mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[ARM] Improve if-conversion for M-class CPUs without branch predictors
The current heuristic in isProfitableToIfCvt assumes we have a branch predictor, and so gives the wrong answer in some cases when we don't. This patch adds a subtarget feature to indicate that a subtarget has no branch predictor, and changes the heuristic in isProfitableToiIfCvt when it's present. This gives a slight overall improvement in a set of embedded benchmarks on Cortex-M4 and Cortex-M33. Differential Revision: https://reviews.llvm.org/D34398 llvm-svn: 306547
This commit is contained in:
parent
ed751c8384
commit
3ad0317ae0
@ -222,6 +222,13 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
|
||||
def FeatureHasRetAddrStack : SubtargetFeature<"ret-addr-stack", "HasRetAddrStack", "true",
|
||||
"Has return address stack">;
|
||||
|
||||
// Some processors have no branch predictor, which changes the expected cost of
|
||||
// taking a branch which affects the choice of whether to use predicated
|
||||
// instructions.
|
||||
def FeatureHasNoBranchPredictor : SubtargetFeature<"no-branch-predictor",
|
||||
"HasBranchPredictor", "false",
|
||||
"Has no branch predictor">;
|
||||
|
||||
/// DSP extension.
|
||||
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
|
||||
"Supports DSP instructions in ARM and/or Thumb2">;
|
||||
@ -756,13 +763,19 @@ def : ProcessorModel<"cortex-r8", CortexA8Model, [ARMv7r,
|
||||
FeatureHasSlowFPVMLx,
|
||||
FeatureAvoidPartialCPSR]>;
|
||||
|
||||
def : ProcNoItin<"cortex-m3", [ARMv7m, ProcM3]>;
|
||||
def : ProcNoItin<"sc300", [ARMv7m, ProcM3]>;
|
||||
def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m,
|
||||
ProcM3,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcNoItin<"cortex-m4", [ARMv7em,
|
||||
def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m,
|
||||
ProcM3,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em,
|
||||
FeatureVFP4,
|
||||
FeatureVFPOnlySP,
|
||||
FeatureD16]>;
|
||||
FeatureD16,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcNoItin<"cortex-m7", [ARMv7em,
|
||||
FeatureFPARMv8,
|
||||
@ -771,11 +784,12 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
|
||||
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
|
||||
FeatureNoMovt]>;
|
||||
|
||||
def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
|
||||
def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline,
|
||||
FeatureDSP,
|
||||
FeatureFPARMv8,
|
||||
FeatureD16,
|
||||
FeatureVFPOnlySP]>;
|
||||
FeatureVFPOnlySP,
|
||||
FeatureHasNoBranchPredictor]>;
|
||||
|
||||
def : ProcNoItin<"cortex-a32", [ARMv8a,
|
||||
FeatureHWDivThumb,
|
||||
|
@ -1851,9 +1851,9 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||
}
|
||||
|
||||
bool ARMBaseInstrInfo::
|
||||
isProfitableToIfCvt(MachineBasicBlock &,
|
||||
isProfitableToIfCvt(MachineBasicBlock &TBB,
|
||||
unsigned TCycles, unsigned TExtra,
|
||||
MachineBasicBlock &,
|
||||
MachineBasicBlock &FBB,
|
||||
unsigned FCycles, unsigned FExtra,
|
||||
BranchProbability Probability) const {
|
||||
if (!TCycles)
|
||||
@ -1863,14 +1863,43 @@ isProfitableToIfCvt(MachineBasicBlock &,
|
||||
// Here we scale up each component of UnpredCost to avoid precision issue when
|
||||
// scaling TCycles/FCycles by Probability.
|
||||
const unsigned ScalingUpFactor = 1024;
|
||||
unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
|
||||
unsigned FUnpredCost =
|
||||
Probability.getCompl().scale(FCycles * ScalingUpFactor);
|
||||
unsigned UnpredCost = TUnpredCost + FUnpredCost;
|
||||
UnpredCost += 1 * ScalingUpFactor; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
|
||||
|
||||
return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
|
||||
unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;
|
||||
unsigned UnpredCost;
|
||||
if (!Subtarget.hasBranchPredictor()) {
|
||||
// When we don't have a branch predictor it's always cheaper to not take a
|
||||
// branch than take it, so we have to take that into account.
|
||||
unsigned NotTakenBranchCost = 1;
|
||||
unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();
|
||||
unsigned TUnpredCycles, FUnpredCycles;
|
||||
if (!FCycles) {
|
||||
// Triangle: TBB is the fallthrough
|
||||
TUnpredCycles = TCycles + NotTakenBranchCost;
|
||||
FUnpredCycles = TakenBranchCost;
|
||||
} else {
|
||||
// Diamond: TBB is the block that is branched to, FBB is the fallthrough
|
||||
TUnpredCycles = TCycles + TakenBranchCost;
|
||||
FUnpredCycles = FCycles + NotTakenBranchCost;
|
||||
}
|
||||
// The total cost is the cost of each path scaled by their probabilites
|
||||
unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);
|
||||
unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);
|
||||
UnpredCost = TUnpredCost + FUnpredCost;
|
||||
// When predicating assume that the first IT can be folded away but later
|
||||
// ones cost one cycle each
|
||||
if (Subtarget.isThumb2() && TCycles + FCycles > 4) {
|
||||
PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;
|
||||
}
|
||||
} else {
|
||||
unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
|
||||
unsigned FUnpredCost =
|
||||
Probability.getCompl().scale(FCycles * ScalingUpFactor);
|
||||
UnpredCost = TUnpredCost + FUnpredCost;
|
||||
UnpredCost += 1 * ScalingUpFactor; // The branch itself
|
||||
UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
|
||||
}
|
||||
|
||||
return PredCost <= UnpredCost;
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -424,3 +424,4 @@ include "ARMScheduleA9.td"
|
||||
include "ARMScheduleSwift.td"
|
||||
include "ARMScheduleR52.td"
|
||||
include "ARMScheduleA57.td"
|
||||
include "ARMScheduleM3.td"
|
||||
|
21
lib/Target/ARM/ARMScheduleM3.td
Normal file
21
lib/Target/ARM/ARMScheduleM3.td
Normal file
@ -0,0 +1,21 @@
|
||||
//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the machine model for the ARM Cortex-M3 processor.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def CortexM3Model : SchedMachineModel {
|
||||
let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
|
||||
let MicroOpBufferSize = 0; // In-order
|
||||
let LoadLatency = 2; // Latency when not pipelined, not pc-relative
|
||||
let MispredictPenalty = 2; // Best case branch taken cost
|
||||
|
||||
let CompleteModel = 0;
|
||||
}
|
@ -246,6 +246,11 @@ protected:
|
||||
/// avoid issue "normal" call instructions to callees which do not return.
|
||||
bool HasRetAddrStack = false;
|
||||
|
||||
/// HasBranchPredictor - True if the subtarget has a branch predictor. Having
|
||||
/// a branch predictor or not changes the expected cost of taking a branch
|
||||
/// which affects the choice of whether to use predicated instructions.
|
||||
bool HasBranchPredictor = true;
|
||||
|
||||
/// HasMPExtension - True if the subtarget supports Multiprocessing
|
||||
/// extension (ARMv7 only).
|
||||
bool HasMPExtension = false;
|
||||
@ -554,6 +559,7 @@ public:
|
||||
bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
|
||||
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
|
||||
bool hasRetAddrStack() const { return HasRetAddrStack; }
|
||||
bool hasBranchPredictor() const { return HasBranchPredictor; }
|
||||
bool hasMPExtension() const { return HasMPExtension; }
|
||||
bool hasDSP() const { return HasDSP; }
|
||||
bool useNaClTrap() const { return UseNaClTrap; }
|
||||
|
154
test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll
Normal file
154
test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll
Normal file
@ -0,0 +1,154 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7m -mcpu=cortex-m7 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BP
|
||||
; RUN: llc < %s -mtriple=thumbv7m -mcpu=cortex-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOBP
|
||||
|
||||
declare void @otherfn()
|
||||
|
||||
; CHECK-LABEL: triangle1:
|
||||
; CHECK: itt ne
|
||||
; CHECK: movne
|
||||
; CHECK: strne
|
||||
define i32 @triangle1(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 1, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
tail call void @otherfn()
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: triangle2:
|
||||
; CHECK-BP: itttt ne
|
||||
; CHECK-BP: movne
|
||||
; CHECK-BP: strne
|
||||
; CHECK-BP: movne
|
||||
; CHECK-BP: strne
|
||||
; CHECK-NOBP: cbz
|
||||
; CHECK-NOBP: movs
|
||||
; CHECK-NOBP: str
|
||||
; CHECK-NOBP: movs
|
||||
; CHECK-NOBP: str
|
||||
define i32 @triangle2(i32 %n, i32* %p, i32* %q) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 1, i32* %p, align 4
|
||||
store i32 2, i32* %q, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
tail call void @otherfn()
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: triangle3:
|
||||
; CHECK: cbz
|
||||
; CHECK: movs
|
||||
; CHECK: str
|
||||
; CHECK: movs
|
||||
; CHECK: str
|
||||
; CHECK: movs
|
||||
; CHECK: str
|
||||
define i32 @triangle3(i32 %n, i32* %p, i32* %q, i32* %r) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 1, i32* %p, align 4
|
||||
store i32 2, i32* %q, align 4
|
||||
store i32 3, i32* %r, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
tail call void @otherfn()
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: diamond1:
|
||||
; CHECK: ite eq
|
||||
; CHECK: ldreq
|
||||
; CHECK: strne
|
||||
define i32 @diamond1(i32 %n, i32* %p) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 %n, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%n.addr.0 = phi i32 [ %n, %if.then ], [ %0, %if.else ]
|
||||
tail call void @otherfn()
|
||||
ret i32 %n.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: diamond2:
|
||||
; CHECK-BP: itte
|
||||
; CHECK-BP: streq
|
||||
; CHECK-BP: ldreq
|
||||
; CHECK-BP: strne
|
||||
; CHECK-NOBP: cbz
|
||||
; CHECK-NOBP: str
|
||||
; CHECK-NOBP: b
|
||||
; CHECK-NOBP: str
|
||||
; CHECK-NOBP: ldr
|
||||
define i32 @diamond2(i32 %n, i32 %m, i32* %p, i32* %q) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 %n, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
store i32 %m, i32* %q, align 4
|
||||
%0 = load i32, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%n.addr.0 = phi i32 [ %n, %if.then ], [ %0, %if.else ]
|
||||
tail call void @otherfn()
|
||||
ret i32 %n.addr.0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: diamond3:
|
||||
; CHECK: cbz
|
||||
; CHECK: movs
|
||||
; CHECK: str
|
||||
; CHECK: b
|
||||
; CHECK: ldr
|
||||
; CHECK: ldr
|
||||
; CHECK: adds
|
||||
define i32 @diamond3(i32 %n, i32* %p, i32* %q) {
|
||||
entry:
|
||||
%tobool = icmp eq i32 %n, 0
|
||||
br i1 %tobool, label %if.else, label %if.then
|
||||
|
||||
if.then:
|
||||
store i32 1, i32* %p, align 4
|
||||
br label %if.end
|
||||
|
||||
if.else:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%1 = load i32, i32* %q, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
br label %if.end
|
||||
|
||||
if.end:
|
||||
%n.addr.0 = phi i32 [ %n, %if.then ], [ %add, %if.else ]
|
||||
tail call void @otherfn()
|
||||
ret i32 %n.addr.0
|
||||
}
|
Loading…
Reference in New Issue
Block a user