1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86] Provide a separate feature bit for macro fusion support instead of basing it on the AVX flag

Summary:
Currently we determine if macro fusion is supported based on the AVX flag as a proxy for the processor being Sandy Bridge".

This is really strange as now AMD supports AVX. It also means if user explicitly disables AVX we disable macro fusion.

This patch adds an explicit macro fusion feature. I've also enabled for the generic 64-bit CPU (which doesn't have AVX)

This is probably another candidate for being in the MI layer, but for now I at least wanted to correct the overloading of the AVX feature.

Reviewers: spatel, chandlerc, RKSimon, zvi

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D37280

llvm-svn: 312097
This commit is contained in:
Craig Topper 2017-08-30 04:34:48 +00:00
parent c0412ae0a3
commit 0b47d4e59e
9 changed files with 43 additions and 22 deletions

View File

@ -288,6 +288,13 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
def FeatureMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@ -372,7 +379,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
@ -382,7 +390,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
// Atom CPUs.
@ -468,7 +477,8 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureCMPXCHG16B,
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
@ -485,7 +495,8 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
def : WestmereProc<"westmere">;
@ -516,7 +527,8 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureLAHFSAHF,
FeatureSlow3OpsLEA,
FeatureFastScalarFSQRT,
FeatureFastSHLDRotate
FeatureFastSHLDRotate,
FeatureMacroFusion
]>;
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@ -731,7 +743,8 @@ def : Proc<"bdver1", [
FeatureXSAVE,
FeatureLWP,
FeatureSlowSHLD,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
// Piledriver
def : Proc<"bdver2", [
@ -755,7 +768,8 @@ def : Proc<"bdver2", [
FeatureLWP,
FeatureFMA,
FeatureSlowSHLD,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
// Steamroller
@ -782,7 +796,8 @@ def : Proc<"bdver3", [
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
// Excavator
@ -810,7 +825,8 @@ def : Proc<"bdver4", [
FeatureSlowSHLD,
FeatureFSGSBase,
FeatureLAHFSAHF,
FeatureMWAITX
FeatureMWAITX,
FeatureMacroFusion
]>;
// Znver1
@ -830,6 +846,7 @@ def: ProcessorModel<"znver1", Znver1Model, [
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
FeatureMacroFusion,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
@ -873,7 +890,8 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [
Feature64Bit,
FeatureSlow3OpsLEA,
FeatureSlowBTMem,
FeatureSlowIncDec
FeatureSlowIncDec,
FeatureMacroFusion
]>;
//===----------------------------------------------------------------------===//

View File

@ -27,10 +27,8 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI);
// Check if this processor supports macro-fusion. Since this is a minor
// heuristic, we haven't specifically reserved a feature. hasAVX is a decent
// proxy for SandyBridge+.
if (!ST.hasAVX())
// Check if this processor supports macro-fusion.
if (!ST.hasMacroFusion())
return false;
enum {

View File

@ -347,6 +347,7 @@ void X86Subtarget::initializeEnvironment() {
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
HasFastSHLDRotate = false;
HasMacroFusion = false;
HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;

View File

@ -238,6 +238,9 @@ protected:
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate;
/// True if the processor supports macrofusion.
bool HasMacroFusion;
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB;
@ -488,6 +491,7 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }

View File

@ -16,8 +16,8 @@ define <8 x i32> @select00(i32 %a, <8 x i32> %b) nounwind {
;
; X64-LABEL: select00:
; X64: # BB#0:
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB0_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1
@ -44,8 +44,8 @@ define <4 x i64> @select01(i32 %a, <4 x i64> %b) nounwind {
;
; X64-LABEL: select01:
; X64: # BB#0:
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: cmpl $255, %edi
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: je .LBB1_2
; X64-NEXT: # BB#1:
; X64-NEXT: vmovaps %ymm0, %ymm1

View File

@ -60,8 +60,8 @@ define <8 x float> @funcE() nounwind {
; CHECK-LABEL: funcE:
; CHECK: # BB#0: # %for_exit499
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: jne .LBB4_2
; CHECK-NEXT: # BB#1: # %load.i1247
; CHECK-NEXT: pushq %rbp

View File

@ -692,8 +692,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
;
; AVX512BW-LABEL: test8:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: cmpl %esi, %edi
; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512BW-NEXT: jg LBB17_1
; AVX512BW-NEXT: ## BB#2:
; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0
@ -708,8 +708,8 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
;
; AVX512DQ-LABEL: test8:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: cmpl %esi, %edi
; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512DQ-NEXT: jg LBB17_1
; AVX512DQ-NEXT: ## BB#2:
; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0

View File

@ -1678,8 +1678,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB39_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB39_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
@ -1914,8 +1914,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
; VEX-NEXT: .LBB41_6:
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: testq %rax, %rax
; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VEX-NEXT: js .LBB41_8
; VEX-NEXT: # BB#7:
; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1

View File

@ -296,9 +296,9 @@ while.end: ; preds = %while.body, %entry
; CHECK-LABEL: Transform
; CHECK-NOT: cmov
; CHECK: divl [[a:%[0-9a-z]*]]
; CHECK: cmpl [[a]], %eax
; CHECK: movl $11, [[s1:%[0-9a-z]*]]
; CHECK: movl [[a]], [[s2:%[0-9a-z]*]]
; CHECK: cmpl [[a]], %edx
; CHECK: ja [[SinkBB:.*]]
; CHECK: [[FalseBB:.*]]:
; CHECK: movl $22, [[s1]]