mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Do not test for CPUs, use SubtargetFeatures (Part 2). NFCI
This is a follow-up for r273544. The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods. Since the ARM backend seems to have quite a lot of calls to these methods, I intend to submit 5-6 subtarget features at a time, instead of one big lump. Differential Revision: http://reviews.llvm.org/D21685 llvm-svn: 273853
This commit is contained in:
parent
20041add8b
commit
d9b9f9bf95
@ -144,6 +144,17 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
|
||||
"true",
|
||||
"Use NEON for single precision FP">;
|
||||
|
||||
// On some processors, VLDn instructions that access unaligned data take one
|
||||
// extra cycle. Take that into account when computing operand latencies.
|
||||
def FeatureCheckVLDnAlign : SubtargetFeature<"vldn-align", "CheckVLDnAlign",
|
||||
"true",
|
||||
"Check for VLDn unaligned access">;
|
||||
|
||||
// Some processors have a nonpipelined VFP coprocessor.
|
||||
def FeatureNonpipelinedVFP : SubtargetFeature<"nonpipelined-vfp",
|
||||
"NonpipelinedVFP", "true",
|
||||
"VFP instructions are not pipelined">;
|
||||
|
||||
// Some processors have FP multiply-accumulate instructions that don't
|
||||
// play nicely with other VFP / NEON instructions, and it's generally better
|
||||
// to just not use them.
|
||||
@ -552,6 +563,7 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
|
||||
|
||||
def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
|
||||
FeatureHasRetAddrStack,
|
||||
FeatureNonpipelinedVFP,
|
||||
FeatureTrustZone,
|
||||
FeatureSlowFPBrcc,
|
||||
FeatureHasSlowFPVMLx,
|
||||
@ -567,6 +579,7 @@ def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
|
||||
FeatureAvoidPartialCPSR,
|
||||
FeaturePreferVMOVSR,
|
||||
FeatureNEONForFPMovs,
|
||||
FeatureCheckVLDnAlign,
|
||||
FeatureMP]>;
|
||||
|
||||
// FIXME: A12 has currently the same Schedule model as A9
|
||||
@ -589,6 +602,7 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
|
||||
FeatureT2XtPk,
|
||||
FeatureVFP4,
|
||||
FeatureMP,
|
||||
FeatureCheckVLDnAlign,
|
||||
FeatureHWDiv,
|
||||
FeatureHWDivARM,
|
||||
FeatureAvoidPartialCPSR,
|
||||
@ -612,6 +626,7 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
|
||||
// division features.
|
||||
def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
|
||||
FeatureHasRetAddrStack,
|
||||
FeatureCheckVLDnAlign,
|
||||
FeatureVMLxForwarding,
|
||||
FeatureT2XtPk,
|
||||
FeatureFP16,
|
||||
|
@ -3024,6 +3024,45 @@ unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
|
||||
return Size / 4;
|
||||
}
|
||||
|
||||
static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,
|
||||
unsigned NumRegs) {
|
||||
unsigned UOps = 1 + NumRegs; // 1 for address computation.
|
||||
switch (Opc) {
|
||||
default:
|
||||
break;
|
||||
case ARM::VLDMDIA_UPD:
|
||||
case ARM::VLDMDDB_UPD:
|
||||
case ARM::VLDMSIA_UPD:
|
||||
case ARM::VLDMSDB_UPD:
|
||||
case ARM::VSTMDIA_UPD:
|
||||
case ARM::VSTMDDB_UPD:
|
||||
case ARM::VSTMSIA_UPD:
|
||||
case ARM::VSTMSDB_UPD:
|
||||
case ARM::LDMIA_UPD:
|
||||
case ARM::LDMDA_UPD:
|
||||
case ARM::LDMDB_UPD:
|
||||
case ARM::LDMIB_UPD:
|
||||
case ARM::STMIA_UPD:
|
||||
case ARM::STMDA_UPD:
|
||||
case ARM::STMDB_UPD:
|
||||
case ARM::STMIB_UPD:
|
||||
case ARM::tLDMIA_UPD:
|
||||
case ARM::tSTMIA_UPD:
|
||||
case ARM::t2LDMIA_UPD:
|
||||
case ARM::t2LDMDB_UPD:
|
||||
case ARM::t2STMIA_UPD:
|
||||
case ARM::t2STMDB_UPD:
|
||||
++UOps; // One for base register writeback.
|
||||
break;
|
||||
case ARM::LDMIA_RET:
|
||||
case ARM::tPOP_RET:
|
||||
case ARM::t2LDMIA_RET:
|
||||
UOps += 2; // One for base reg wb, one for write to pc.
|
||||
break;
|
||||
}
|
||||
return UOps;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *MI) const {
|
||||
@ -3107,65 +3146,35 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
|
||||
case ARM::t2STMIA_UPD:
|
||||
case ARM::t2STMDB_UPD: {
|
||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
|
||||
if (Subtarget.isSwift()) {
|
||||
int UOps = 1 + NumRegs; // One for address computation, one for each ld / st.
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ARM::VLDMDIA_UPD:
|
||||
case ARM::VLDMDDB_UPD:
|
||||
case ARM::VLDMSIA_UPD:
|
||||
case ARM::VLDMSDB_UPD:
|
||||
case ARM::VSTMDIA_UPD:
|
||||
case ARM::VSTMDDB_UPD:
|
||||
case ARM::VSTMSIA_UPD:
|
||||
case ARM::VSTMSDB_UPD:
|
||||
case ARM::LDMIA_UPD:
|
||||
case ARM::LDMDA_UPD:
|
||||
case ARM::LDMDB_UPD:
|
||||
case ARM::LDMIB_UPD:
|
||||
case ARM::STMIA_UPD:
|
||||
case ARM::STMDA_UPD:
|
||||
case ARM::STMDB_UPD:
|
||||
case ARM::STMIB_UPD:
|
||||
case ARM::tLDMIA_UPD:
|
||||
case ARM::tSTMIA_UPD:
|
||||
case ARM::t2LDMIA_UPD:
|
||||
case ARM::t2LDMDB_UPD:
|
||||
case ARM::t2STMIA_UPD:
|
||||
case ARM::t2STMDB_UPD:
|
||||
++UOps; // One for base register writeback.
|
||||
break;
|
||||
case ARM::LDMIA_RET:
|
||||
case ARM::tPOP_RET:
|
||||
case ARM::t2LDMIA_RET:
|
||||
UOps += 2; // One for base reg wb, one for write to pc.
|
||||
break;
|
||||
}
|
||||
return UOps;
|
||||
} else if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {
|
||||
switch (Subtarget.getLdStMultipleTiming()) {
|
||||
case ARMSubtarget::SingleIssuePlusExtras:
|
||||
return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);
|
||||
case ARMSubtarget::SingleIssue:
|
||||
// Assume the worst.
|
||||
return NumRegs;
|
||||
case ARMSubtarget::DoubleIssue: {
|
||||
if (NumRegs < 4)
|
||||
return 2;
|
||||
// 4 registers would be issued: 2, 2.
|
||||
// 5 registers would be issued: 2, 2, 1.
|
||||
int A8UOps = (NumRegs / 2);
|
||||
unsigned UOps = (NumRegs / 2);
|
||||
if (NumRegs % 2)
|
||||
++A8UOps;
|
||||
return A8UOps;
|
||||
} else if (Subtarget.isLikeA9()) {
|
||||
int A9UOps = (NumRegs / 2);
|
||||
++UOps;
|
||||
return UOps;
|
||||
}
|
||||
case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {
|
||||
unsigned UOps = (NumRegs / 2);
|
||||
// If there are odd number of registers or if it's not 64-bit aligned,
|
||||
// then it takes an extra AGU (Address Generation Unit) cycle.
|
||||
if ((NumRegs % 2) ||
|
||||
!MI->hasOneMemOperand() ||
|
||||
if ((NumRegs % 2) || !MI->hasOneMemOperand() ||
|
||||
(*MI->memoperands_begin())->getAlignment() < 8)
|
||||
++A9UOps;
|
||||
return A9UOps;
|
||||
} else {
|
||||
// Assume the worst.
|
||||
return NumRegs;
|
||||
++UOps;
|
||||
return UOps;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
llvm_unreachable("Didn't find the number of microops");
|
||||
}
|
||||
|
||||
int
|
||||
@ -3542,7 +3551,7 @@ static int adjustDefLatency(const ARMSubtarget &Subtarget,
|
||||
}
|
||||
}
|
||||
|
||||
if (DefAlign < 8 && Subtarget.isLikeA9()) {
|
||||
if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {
|
||||
switch (DefMCID->getOpcode()) {
|
||||
default: break;
|
||||
case ARM::VLD1q8:
|
||||
@ -3767,10 +3776,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
|
||||
if (!UseNode->isMachineOpcode()) {
|
||||
int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
|
||||
if (Subtarget.isLikeA9() || Subtarget.isSwift())
|
||||
return Latency <= 2 ? 1 : Latency - 1;
|
||||
else
|
||||
return Latency <= 3 ? 1 : Latency - 2;
|
||||
int Adj = Subtarget.getPreISelOperandLatencyAdjustment();
|
||||
int Threshold = 1 + Adj;
|
||||
return Latency <= Threshold ? 1 : Latency - Adj;
|
||||
}
|
||||
|
||||
const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
|
||||
@ -3841,7 +3849,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
}
|
||||
}
|
||||
|
||||
if (DefAlign < 8 && Subtarget.isLikeA9())
|
||||
if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
|
||||
switch (DefMCID.getOpcode()) {
|
||||
default: break;
|
||||
case ARM::VLD1q8:
|
||||
@ -4060,9 +4068,8 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
|
||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
||||
unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||
unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
|
||||
if (Subtarget.isCortexA8() &&
|
||||
if (Subtarget.nonpipelinedVFP() &&
|
||||
(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
|
||||
// CortexA8 VFP instructions are not pipelined.
|
||||
return true;
|
||||
|
||||
// Hoist VFP / NEON instructions with 4 or higher latency.
|
||||
|
@ -109,10 +109,13 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
|
||||
Has8MSecExt(false), HasCrypto(false), HasCRC(false), HasRAS(false),
|
||||
HasZeroCycleZeroing(false), IsProfitableToUnpredicate(false),
|
||||
HasSlowVGETLNi32(false), HasSlowVDUP32(false), PreferVMOVSR(false),
|
||||
PreferISHST(false), UseNEONForFPMovs(false), StrictAlign(false),
|
||||
RestrictIT(false), HasDSP(false), UseNaClTrap(false), GenLongCalls(false),
|
||||
PreferISHST(false), UseNEONForFPMovs(false), CheckVLDnAlign(false),
|
||||
NonpipelinedVFP(false), StrictAlign(false), RestrictIT(false),
|
||||
HasDSP(false), UseNaClTrap(false), GenLongCalls(false),
|
||||
UnsafeFPMath(false), UseSjLjEH(false), stackAlignment(4), CPUString(CPU),
|
||||
IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
|
||||
MaxInterleaveFactor(1), LdStMultipleTiming(SingleIssue),
|
||||
PreISelOperandLatencyAdjustment(2), IsLittle(IsLittle), TargetTriple(TT),
|
||||
Options(TM.Options), TM(TM),
|
||||
FrameLowering(initializeFrameLowering(CPU, FS)),
|
||||
// At this point initializeSubtargetDependencies has been called so
|
||||
// we can query directly.
|
||||
@ -221,6 +224,51 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
||||
if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters
|
||||
(Options.UnsafeFPMath || isTargetDarwin()))
|
||||
UseNEONForSinglePrecisionFP = true;
|
||||
|
||||
// FIXME: Teach TableGen to deal with these instead of doing it manually here.
|
||||
switch (ARMProcFamily) {
|
||||
case Others:
|
||||
case CortexA5:
|
||||
break;
|
||||
case CortexA7:
|
||||
LdStMultipleTiming = DoubleIssue;
|
||||
break;
|
||||
case CortexA8:
|
||||
LdStMultipleTiming = DoubleIssue;
|
||||
break;
|
||||
case CortexA9:
|
||||
LdStMultipleTiming = DoubleIssueCheckUnalignedAccess;
|
||||
PreISelOperandLatencyAdjustment = 1;
|
||||
break;
|
||||
case CortexA12:
|
||||
break;
|
||||
case CortexA15:
|
||||
MaxInterleaveFactor = 2;
|
||||
PreISelOperandLatencyAdjustment = 1;
|
||||
break;
|
||||
case CortexA17:
|
||||
case CortexA32:
|
||||
case CortexA35:
|
||||
case CortexA53:
|
||||
case CortexA57:
|
||||
case CortexA72:
|
||||
case CortexA73:
|
||||
case CortexR4:
|
||||
case CortexR4F:
|
||||
case CortexR5:
|
||||
case CortexR7:
|
||||
case CortexM3:
|
||||
case ExynosM1:
|
||||
break;
|
||||
case Krait:
|
||||
PreISelOperandLatencyAdjustment = 1;
|
||||
break;
|
||||
case Swift:
|
||||
MaxInterleaveFactor = 2;
|
||||
LdStMultipleTiming = SingleIssuePlusExtras;
|
||||
PreISelOperandLatencyAdjustment = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool ARMSubtarget::isAPCS_ABI() const {
|
||||
|
@ -56,6 +56,22 @@ protected:
|
||||
ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline
|
||||
};
|
||||
|
||||
public:
|
||||
/// What kind of timing do load multiple/store multiple instructions have.
|
||||
enum ARMLdStMultipleTiming {
|
||||
/// Can load/store 2 registers/cycle.
|
||||
DoubleIssue,
|
||||
/// Can load/store 2 registers/cycle, but needs an extra cycle if the access
|
||||
/// is not 64-bit aligned.
|
||||
DoubleIssueCheckUnalignedAccess,
|
||||
/// Can load/store 1 register/cycle.
|
||||
SingleIssue,
|
||||
/// Can load/store 1 register/cycle, but needs an extra cycle for address
|
||||
/// computation and potentially also for register writeback.
|
||||
SingleIssuePlusExtras,
|
||||
};
|
||||
|
||||
protected:
|
||||
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
|
||||
ARMProcFamilyEnum ARMProcFamily;
|
||||
|
||||
@ -236,6 +252,12 @@ protected:
|
||||
/// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
|
||||
bool UseNEONForFPMovs;
|
||||
|
||||
/// If true, VLDn instructions take an extra cycle for unaligned accesses.
|
||||
bool CheckVLDnAlign;
|
||||
|
||||
/// If true, VFP instructions are not pipelined.
|
||||
bool NonpipelinedVFP;
|
||||
|
||||
/// StrictAlign - If true, the subtarget disallows unaligned memory
|
||||
/// accesses for some types. For details, see
|
||||
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
|
||||
@ -268,6 +290,16 @@ protected:
|
||||
/// CPUString - String name of used CPU.
|
||||
std::string CPUString;
|
||||
|
||||
unsigned MaxInterleaveFactor;
|
||||
|
||||
/// What kind of timing do load multiple/store multiple have (double issue,
|
||||
/// single issue etc).
|
||||
ARMLdStMultipleTiming LdStMultipleTiming;
|
||||
|
||||
/// The adjustment that we need to apply to get the operand latency from the
|
||||
/// operand cycle returned by the itinerary data for pre-ISel operands.
|
||||
int PreISelOperandLatencyAdjustment;
|
||||
|
||||
/// IsLittle - The target is Little Endian
|
||||
bool IsLittle;
|
||||
|
||||
@ -400,6 +432,8 @@ public:
|
||||
bool preferVMOVSR() const { return PreferVMOVSR; }
|
||||
bool preferISHSTBarriers() const { return PreferISHST; }
|
||||
bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
|
||||
bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
|
||||
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
|
||||
bool prefers32BitThumb() const { return Pref32BitThumb; }
|
||||
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
|
||||
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
|
||||
@ -538,6 +572,16 @@ public:
|
||||
/// function for this subtarget.
|
||||
unsigned getStackAlignment() const { return stackAlignment; }
|
||||
|
||||
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
|
||||
|
||||
ARMLdStMultipleTiming getLdStMultipleTiming() const {
|
||||
return LdStMultipleTiming;
|
||||
}
|
||||
|
||||
int getPreISelOperandLatencyAdjustment() const {
|
||||
return PreISelOperandLatencyAdjustment;
|
||||
}
|
||||
|
||||
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
|
||||
/// symbol.
|
||||
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
|
||||
|
@ -97,10 +97,7 @@ public:
|
||||
}
|
||||
|
||||
unsigned getMaxInterleaveFactor(unsigned VF) {
|
||||
// These are out of order CPUs:
|
||||
if (ST->isCortexA15() || ST->isSwift())
|
||||
return 2;
|
||||
return 1;
|
||||
return ST->getMaxInterleaveFactor();
|
||||
}
|
||||
|
||||
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user