1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/lib/Target/AArch64/AArch64Subtarget.h
Cullen Rhodes e25a1a8f41 [AArch64] Add target features for Armv9-A Scalable Matrix Extension (SME)
First patch in a series adding MC layer support for the Arm Scalable
Matrix Extension.

This patch adds the following features:

    sme, sme-i64, sme-f64

The sme-i64 and sme-f64 flags are for the optional I16I64 and F64F64
features.

If a target supports I16I64 then the following instructions are
implemented:

  * 64-bit integer ADDHA and ADDVA variants (D105570).
  * SMOPA, SMOPS, SUMOPA, SUMOPS, UMOPA, UMOPS, USMOPA, and USMOPS
    instructions that accumulate 16-bit integer outer products into 64-bit
    integer tiles.

If a target supports F64F64 then the FMOPA and FMOPS instructions that
accumulate double-precision floating-point outer products into
double-precision tiles are implemented.

Outer products are implemented in D105571.

The reference can be found here:
https://developer.arm.com/documentation/ddi0602/2021-06

Reviewed By: CarolineConcatto

Differential Revision: https://reviews.llvm.org/D105569
2021-07-12 13:28:10 +00:00

621 lines
20 KiB
C++

//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the AArch64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "AArch64GenSubtargetInfo.inc"
namespace llvm {
class GlobalValue;
class StringRef;
class Triple;
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
Others,
A64FX,
AppleA7,
AppleA10,
AppleA11,
AppleA12,
AppleA13,
AppleA14,
Carmel,
CortexA35,
CortexA53,
CortexA55,
CortexA57,
CortexA65,
CortexA72,
CortexA73,
CortexA75,
CortexA76,
CortexA77,
CortexA78,
CortexA78C,
CortexR82,
CortexX1,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
NeoverseN2,
NeoverseV1,
Saphira,
ThunderX2T99,
ThunderX,
ThunderXT81,
ThunderXT83,
ThunderXT88,
ThunderX3T110,
TSV110
};
protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
bool HasV8_1aOps = false;
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
bool HasV8_0rOps = false;
bool HasCONTEXTIDREL2 = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
bool HasDotProd = false;
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
bool HasFP16FML = false;
bool HasSPE = false;
// ARMv8.1 extensions
bool HasVH = false;
bool HasPAN = false;
bool HasLOR = false;
// ARMv8.2 extensions
bool HasPsUAO = false;
bool HasPAN_RWV = false;
bool HasCCPP = false;
// SVE extensions
bool HasSVE = false;
bool UseExperimentalZeroingPseudos = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
bool HasSHA3 = false;
bool HasSHA2 = false;
bool HasAES = false;
// ARMv8.3 extensions
bool HasPAuth = false;
bool HasJS = false;
bool HasCCIDX = false;
bool HasComplxNum = false;
// ARMv8.4 extensions
bool HasNV = false;
bool HasMPAM = false;
bool HasDIT = false;
bool HasTRACEV8_4 = false;
bool HasAM = false;
bool HasSEL2 = false;
bool HasPMU = false;
bool HasTLB_RMI = false;
bool HasFlagM = false;
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
// Armv8.5-A Extensions
bool HasAlternativeNZCV = false;
bool HasFRInt3264 = false;
bool HasSpecRestrict = false;
bool HasSSBS = false;
bool HasSB = false;
bool HasPredRes = false;
bool HasCCDP = false;
bool HasBTI = false;
bool HasRandGen = false;
bool HasMTE = false;
bool HasTME = false;
// Armv8.6-A Extensions
bool HasBF16 = false;
bool HasMatMulInt8 = false;
bool HasMatMulFP32 = false;
bool HasMatMulFP64 = false;
bool HasAMVS = false;
bool HasFineGrainedTraps = false;
bool HasEnhancedCounterVirtualization = false;
// Armv8.7-A Extensions
bool HasXS = false;
bool HasWFxT = false;
bool HasHCX = false;
bool HasLS64 = false;
// Arm SVE2 extensions
bool HasSVE2 = false;
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool HasSVE2SHA3 = false;
bool HasSVE2BitPerm = false;
// Armv9-A Extensions
bool HasRME = false;
// Arm Scalable Matrix Extension (SME)
bool HasSME = false;
bool HasSMEF64 = false;
bool HasSMEI64 = false;
// Future architecture extensions.
bool HasETE = false;
bool HasTRBE = false;
bool HasBRBE = false;
bool HasPAUTH = false;
bool HasSPE_EEF = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
bool HasZeroCycleZeroingGP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
// as movi is more efficient across all cores. Newer cores can eliminate
// fmovs early and there is no difference with movi, but this not true for
// all implementations.
bool HasZeroCycleZeroingFP = true;
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
// NegativeImmediates - transform instructions with negative immediates
bool NegativeImmediates = true;
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
bool OutlineAtomics = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
bool CustomAsCheapAsMove = false;
bool ExynosAsCheapAsMove = false;
bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false;
bool Paired128IsSlow = false;
bool STRQroIsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasCmpBccFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseArithmeticLogic = false;
bool HasFuseCCSelect = false;
bool HasFuseCryptoEOR = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
bool Force32BitJumpTables = false;
bool UseEL1ForTP = false;
bool UseEL2ForTP = false;
bool UseEL3ForTP = false;
bool AllowTaggedGlobals = false;
bool HardenSlsRetBr = false;
bool HardenSlsBlr = false;
bool HardenSlsNoComdat = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
unsigned PrefFunctionLogAlignment = 0;
unsigned PrefLoopLogAlignment = 0;
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
// ReserveXRegister[i] - X#i is not available as a general purpose register.
BitVector ReserveXRegister;
// CustomCallUsedXRegister[i] - X#i call saved.
BitVector CustomCallSavedXRegs;
bool IsLittle;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
AArch64FrameLowering FrameLowering;
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
AArch64TargetLowering TLInfo;
/// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
private:
/// initializeSubtargetDependencies - Initializes using CPUString and the
/// passed in feature string so that we can use initializer lists for
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
StringRef CPUString);
/// Initialize properties based on the selected processor family.
void initializeProperties();
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM,
bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0);
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const AArch64FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const AArch64TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
const CallLowering *getCallLowering() const override;
const InlineAsmLowering *getInlineAsmLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override {
return UsePostRAScheduler;
}
/// Returns ARM processor family.
/// Avoid this function! CPU specifics should be kept local to this class
/// and preferably modeled with SubtargetFeatures or properties in
/// initializeProperties().
ARMProcFamilyEnum getProcFamily() const {
return ARMProcFamily;
}
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
bool hasZeroCycleZeroingFPWorkaround() const {
return HasZeroCycleZeroingFPWorkaround;
}
bool requiresStrictAlign() const { return StrictAlign; }
bool isXRaySupported() const override { return true; }
unsigned getMinVectorRegisterBitWidth() const {
return MinVectorRegisterBitWidth;
}
bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
bool isXRegCustomCalleeSaved(size_t i) const {
return CustomCallSavedXRegs[i];
}
bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
bool hasRDM() const { return HasRDM; }
bool hasSM4() const { return HasSM4; }
bool hasSHA3() const { return HasSHA3; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
bool hasCONTEXTIDREL2() const { return HasCONTEXTIDREL2; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
}
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
bool isPaired128Slow() const { return Paired128IsSlow; }
bool isSTRQroSlow() const { return STRQroIsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasCmpBccFusion() const { return HasCmpBccFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
bool hasFuseCCSelect() const { return HasFuseCCSelect; }
bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const {
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
hasFuseAES() || hasFuseArithmeticLogic() ||
hasFuseCCSelect() || hasFuseLiterals();
}
bool hardenSlsRetBr() const { return HardenSlsRetBr; }
bool hardenSlsBlr() const { return HardenSlsBlr; }
bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
bool useEL1ForTP() const { return UseEL1ForTP; }
bool useEL2ForTP() const { return UseEL2ForTP; }
bool useEL3ForTP() const { return UseEL3ForTP; }
bool useRSqrt() const { return UseRSqrt; }
bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
}
unsigned getCacheLineSize() const override { return CacheLineSize; }
unsigned getPrefetchDistance() const override { return PrefetchDistance; }
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
unsigned NumStridedMemAccesses,
unsigned NumPrefetches,
bool HasCall) const override {
return MinPrefetchStride;
}
unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;
}
unsigned getPrefFunctionLogAlignment() const {
return PrefFunctionLogAlignment;
}
unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
unsigned getWideningBaseCost() const { return WideningBaseCost; }
bool useExperimentalZeroingPseudos() const {
return UseExperimentalZeroingPseudos;
}
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasFP16FML() const { return HasFP16FML; }
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
bool hasSVE2() const { return HasSVE2; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
bool hasFRInt3264() const { return HasFRInt3264; }
bool hasSpecRestrict() const { return HasSpecRestrict; }
bool hasSSBS() const { return HasSSBS; }
bool hasSB() const { return HasSB; }
bool hasPredRes() const { return HasPredRes; }
bool hasCCDP() const { return HasCCDP; }
bool hasBTI() const { return HasBTI; }
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
bool hasTME() const { return HasTME; }
bool hasPAUTH() const { return HasPAUTH; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
bool hasMatMulInt8() const { return HasMatMulInt8; }
bool hasMatMulFP32() const { return HasMatMulFP32; }
bool hasMatMulFP64() const { return HasMatMulFP64; }
// Armv8.6-A Extensions
bool hasBF16() const { return HasBF16; }
bool hasFineGrainedTraps() const { return HasFineGrainedTraps; }
bool hasEnhancedCounterVirtualization() const {
return HasEnhancedCounterVirtualization;
}
// Arm Scalable Matrix Extension (SME)
bool hasSME() const { return HasSME; }
bool hasSMEF64() const { return HasSMEF64; }
bool hasSMEI64() const { return HasSMEI64; }
bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetILP32() const {
return TargetTriple.isArch32Bit() ||
TargetTriple.getEnvironment() == Triple::GNUILP32;
}
bool useAA() const override;
bool outlineAtomics() const { return OutlineAtomics; }
bool hasVH() const { return HasVH; }
bool hasPAN() const { return HasPAN; }
bool hasLOR() const { return HasLOR; }
bool hasPsUAO() const { return HasPsUAO; }
bool hasPAN_RWV() const { return HasPAN_RWV; }
bool hasCCPP() const { return HasCCPP; }
bool hasPAuth() const { return HasPAuth; }
bool hasJS() const { return HasJS; }
bool hasCCIDX() const { return HasCCIDX; }
bool hasComplxNum() const { return HasComplxNum; }
bool hasNV() const { return HasNV; }
bool hasMPAM() const { return HasMPAM; }
bool hasDIT() const { return HasDIT; }
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
bool hasAMVS() const { return HasAMVS; }
bool hasXS() const { return HasXS; }
bool hasWFxT() const { return HasWFxT; }
bool hasHCX() const { return HasHCX; }
bool hasLS64() const { return HasLS64; }
bool hasSEL2() const { return HasSEL2; }
bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
bool hasFlagM() const { return HasFlagM; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
// addressing-modes in ILP32 mode.
return useAA() || isTargetILP32();
}
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
// Kernel is currently allowed only for Fuchsia targets,
// where it is the same as Small for almost all purposes.
case CodeModel::Small:
return true;
default:
return false;
}
}
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const;
unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
const TargetMachine &TM) const;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
bool enableEarlyIfConversion() const override;
bool enableAdvancedRASplitCost() const override { return false; }
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Swift:
return isTargetWindows();
case CallingConv::Win64:
return true;
default:
return false;
}
}
void mirFileLoaded(MachineFunction &MF) const override;
// Return the known range for the bit length of SVE data registers. A value
// of 0 means nothing is known about that particular limit beyong what's
// implied by the architecture.
unsigned getMaxSVEVectorSizeInBits() const {
assert(HasSVE && "Tried to get SVE vector length without SVE support!");
return MaxSVEVectorSizeInBits;
}
unsigned getMinSVEVectorSizeInBits() const {
assert(HasSVE && "Tried to get SVE vector length without SVE support!");
return MinSVEVectorSizeInBits;
}
bool useSVEForFixedLengthVectors() const;
};
} // End llvm namespace
#endif