1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[SimplifyCFG] Cost required selects

Before we speculatively execute a basic block, query the cost of
inserting the necessary select instructions against the phi folding
threshold. For non-trivial insertions, a more accurate decision can
probably be made during machine if-conversion. With minsize we query
the CodeSize cost, otherwise we use SizeAndLatency.

Differential Revision: https://reviews.llvm.org/D82438
This commit is contained in:
Sam Parker 2020-08-21 09:27:21 +01:00
parent ac79b2437e
commit 76932d3b0f
2 changed files with 25 additions and 5 deletions

View File

@ -2042,6 +2042,25 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
BasicBlock *BB = BI->getParent();
BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
TargetTransformInfo::TargetCostKind CostKind =
BI->getFunction()->hasMinSize() ?
TargetTransformInfo::TCK_CodeSize :
TargetTransformInfo::TCK_SizeAndLatency;
// Check how expensive it will be to insert the necessary selects.
unsigned CostOfSelects = 0;
for (PHINode &PN : EndBB->phis()) {
unsigned OrigI = PN.getBasicBlockIndex(BB);
unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
Value *OrigV = PN.getIncomingValue(OrigI);
Value *ThenV = PN.getIncomingValue(ThenI);
if (OrigV != ThenV)
CostOfSelects +=
TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
CostKind);
}
if (CostOfSelects > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
// If ThenBB is actually on the false edge of the conditional branch, remember
// to swap the select operands later.
bool Invert = false;

View File

@ -6,6 +6,7 @@
; Test that the phis from for.inc.preheader aren't hoisted into sw.bb92 because
; the cost is too great - we can make a better decision later on.
; FIXME: The Arm cost model needs to be updated for SizeAndLatency.
define i32 @multiple_spec_select_costs(i8* %a, i32* %idx, i8 %in) {
; V8M-LABEL: @multiple_spec_select_costs(
; V8M-NEXT: entry:
@ -113,13 +114,13 @@ define i32 @multiple_spec_select_costs_minsize(i8* %a, i32* %idx, i8 %in) #0 {
; V8M: sw.bb92:
; V8M-NEXT: [[C_OFF_I150:%.*]] = add i8 [[IN]], -48
; V8M-NEXT: [[UGT_9:%.*]] = icmp ugt i8 [[C_OFF_I150]], 9
; V8M-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[UGT_9]], i1 false, i1 true
; V8M-NEXT: [[SPEC_SELECT1:%.*]] = select i1 [[UGT_9]], i32 1, i32 7
; V8M-NEXT: br i1 [[UGT_9]], label [[FOR_INC_PREHEADER]], label [[SELECT_UNFOLD198:%.*]]
; V8M: select.unfold198:
; V8M-NEXT: br label [[FOR_INC_PREHEADER]]
; V8M: for.inc.preheader:
; V8M-NEXT: [[STR_PH_0:%.*]] = phi i8* [ [[GEP_A_2]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR109_C4]], [[SW_BB92]] ]
; V8M-NEXT: [[CMP:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[SPEC_SELECT]], [[SW_BB92]] ]
; V8M-NEXT: [[PHI_RES:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[SPEC_SELECT1]], [[SW_BB92]] ]
; V8M-NEXT: [[STR_PH_0:%.*]] = phi i8* [ [[INCDEC_PTR109_C4]], [[SELECT_UNFOLD198]] ], [ [[INCDEC_PTR109_C4]], [[SW_BB92]] ], [ [[GEP_A_2]], [[ENTRY:%.*]] ]
; V8M-NEXT: [[CMP:%.*]] = phi i1 [ true, [[SELECT_UNFOLD198]] ], [ false, [[SW_BB92]] ], [ false, [[ENTRY]] ]
; V8M-NEXT: [[PHI_RES:%.*]] = phi i32 [ 7, [[SELECT_UNFOLD198]] ], [ 1, [[SW_BB92]] ], [ 1, [[ENTRY]] ]
; V8M-NEXT: br label [[FOR_INC:%.*]]
; V8M: for.inc:
; V8M-NEXT: [[STR_PH_1:%.*]] = phi i8* [ [[INCDEC_PTR109:%.*]], [[FOR_BODY:%.*]] ], [ [[STR_PH_0]], [[FOR_INC_PREHEADER]] ]