1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Filter out tuning feature flags and a few ISA feature flags when checking for function inline compatibility.

Tuning flags don't have any effect on the available instructions so aren't a good reason to prevent inlining.

There are also some ISA flags that don't have any intrinsics our ABI requirements that we can exclude. I've put only the most basic ones like cmpxchg16b and lahfsahf. These are interesting because they aren't present in all 64-bit CPUs, but we have codegen workarounds when they aren't present.

Loosening these checks can help with scenarios where a caller has a more specific CPU than a callee. The default tuning flags on our generic 'x86-64' CPU can currently make it inline compatible with other CPUs. I've also added an example test for 'nocona' and 'prescott' where 'nocona' is just a 64-bit capable version of 'prescott' but in 32-bit mode they should be completely compatible.

I've based the implementation here of the similar code in AMDGPU.

Differential Revision: https://reviews.llvm.org/D58371

llvm-svn: 354355
This commit is contained in:
Craig Topper 2019-02-19 17:05:11 +00:00
parent 0636313e2d
commit b1993ab24f
4 changed files with 115 additions and 4 deletions

View File

@ -3065,10 +3065,9 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
const FeatureBitset &CalleeBits =
TM.getSubtargetImpl(*Callee)->getFeatureBits();
// FIXME: This is likely too limiting as it will include subtarget features
// that we might not care about for inlining, but it is conservatively
// correct.
return (CallerBits & CalleeBits) == CalleeBits;
FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList;
FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList;
return (RealCallerBits & RealCalleeBits) == RealCalleeBits;
}
const X86TTIImpl::TTI::MemCmpExpansionOptions *

View File

@ -35,6 +35,60 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
const FeatureBitset InlineFeatureIgnoreList = {
// This indicates the CPU is 64 bit capable not that we are in 64-bit mode.
X86::Feature64Bit,
// These features don't have any intrinsics or ABI effect.
X86::FeatureNOPL,
X86::FeatureCMPXCHG16B,
X86::FeatureLAHFSAHF,
// Codegen control options.
X86::FeatureFast11ByteNOP,
X86::FeatureFast15ByteNOP,
X86::FeatureFastBEXTR,
X86::FeatureFastHorizontalOps,
X86::FeatureFastLZCNT,
X86::FeatureFastPartialYMMorZMMWrite,
X86::FeatureFastScalarFSQRT,
X86::FeatureFastSHLDRotate,
X86::FeatureFastVariableShuffle,
X86::FeatureFastVectorFSQRT,
X86::FeatureLEAForSP,
X86::FeatureLEAUsesAG,
X86::FeatureLZCNTFalseDeps,
X86::FeatureMacroFusion,
X86::FeatureMergeToThreeWayBranch,
X86::FeaturePadShortFunctions,
X86::FeaturePOPCNTFalseDeps,
X86::FeatureSSEUnalignedMem,
X86::FeatureSlow3OpsLEA,
X86::FeatureSlowDivide32,
X86::FeatureSlowDivide64,
X86::FeatureSlowIncDec,
X86::FeatureSlowLEA,
X86::FeatureSlowPMADDWD,
X86::FeatureSlowPMULLD,
X86::FeatureSlowSHLD,
X86::FeatureSlowTwoMemOps,
X86::FeatureSlowUAMem16,
// Perf-tuning flags.
X86::FeatureHasFastGather,
X86::FeatureSlowUAMem32,
// Based on whether user set the -mprefer-vector-width command line.
X86::FeaturePrefer256Bit,
// CPU name enums. These just follow CPU string.
X86::ProcIntelAtom,
X86::ProcIntelGLM,
X86::ProcIntelGLP,
X86::ProcIntelSLM,
X86::ProcIntelTRM,
};
public:
explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),

View File

@ -0,0 +1,15 @@
; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s
define i32 @func_target_cpu_nocona() #0 {
ret i32 0
}
; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona(
; CHECK-NEXT: ret i32 0
define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 {
%call = call i32 @func_target_cpu_nocona()
ret i32 %call
}
attributes #0 = { nounwind "target-cpu"="nocona" }
attributes #1 = { nounwind "target-cpu"="prescott" }

View File

@ -0,0 +1,43 @@
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s
define i32 @func_target_cpu_base() #0 {
ret i32 0
}
; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base(
; CHECK-NEXT: ret i32 0
define i32 @target_cpu_k8_call_target_cpu_base() #1 {
%call = call i32 @func_target_cpu_base()
ret i32 %call
}
; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base(
; CHECK-NEXT: ret i32 0
define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 {
%call = call i32 @func_target_cpu_base()
ret i32 %call
}
; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base(
; CHECK-NEXT: ret i32 0
define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 {
%call = call i32 @func_target_cpu_base()
ret i32 %call
}
define i32 @func_target_cpu_nocona() #4 {
ret i32 0
}
; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona(
; CHECK-NEXT: ret i32 0
define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 {
%call = call i32 @func_target_cpu_nocona()
ret i32 %call
}
attributes #0 = { nounwind "target-cpu"="x86-64" }
attributes #1 = { nounwind "target-cpu"="k8" }
attributes #2 = { nounwind "target-cpu"="nehalem" }
attributes #3 = { nounwind "target-cpu"="goldmont" }
attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" }