2014-05-24 14:50:23 +02:00
|
|
|
//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
|
2014-03-29 11:18:08 +01:00
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2014-03-29 11:18:08 +01:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
2014-05-24 14:50:23 +02:00
|
|
|
// This file implements the AArch64ConditionalCompares pass which reduces
|
2014-03-29 11:18:08 +01:00
|
|
|
// branching and code size by using the conditional compare instructions CCMP,
|
|
|
|
// CCMN, and FCMP.
|
|
|
|
//
|
|
|
|
// The CFG transformations for forming conditional compares are very similar to
|
|
|
|
// if-conversion, and this pass should run immediately before the early
|
|
|
|
// if-conversion pass.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
#include "AArch64.h"
|
2014-03-29 11:18:08 +01:00
|
|
|
#include "llvm/ADT/DepthFirstIterator.h"
|
|
|
|
#include "llvm/ADT/SetVector.h"
|
|
|
|
#include "llvm/ADT/SmallPtrSet.h"
|
|
|
|
#include "llvm/ADT/Statistic.h"
|
2017-06-27 17:00:22 +02:00
|
|
|
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
2014-03-29 11:18:08 +01:00
|
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
|
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/MachineTraceMetrics.h"
|
|
|
|
#include "llvm/CodeGen/Passes.h"
|
2017-11-08 02:01:31 +01:00
|
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
2017-11-17 02:07:10 +01:00
|
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
Sink all InitializePasses.h includes
This file lists every pass in LLVM, and is included by Pass.h, which is
very popular. Every time we add, remove, or rename a pass in LLVM, it
caused lots of recompilation.
I found this fact by looking at this table, which is sorted by the
number of times a file was changed over the last 100,000 git commits
multiplied by the number of object files that depend on it in the
current checkout:
recompiles touches affected_files header
342380 95 3604 llvm/include/llvm/ADT/STLExtras.h
314730 234 1345 llvm/include/llvm/InitializePasses.h
307036 118 2602 llvm/include/llvm/ADT/APInt.h
213049 59 3611 llvm/include/llvm/Support/MathExtras.h
170422 47 3626 llvm/include/llvm/Support/Compiler.h
162225 45 3605 llvm/include/llvm/ADT/Optional.h
158319 63 2513 llvm/include/llvm/ADT/Triple.h
140322 39 3598 llvm/include/llvm/ADT/StringRef.h
137647 59 2333 llvm/include/llvm/Support/Error.h
131619 73 1803 llvm/include/llvm/Support/FileSystem.h
Before this change, touching InitializePasses.h would cause 1345 files
to recompile. After this change, touching it only causes 550 compiles in
an incremental rebuild.
Reviewers: bkramer, asbirlea, bollu, jdoerfert
Differential Revision: https://reviews.llvm.org/D70211
2019-11-13 22:15:01 +01:00
|
|
|
#include "llvm/InitializePasses.h"
|
2014-03-29 11:18:08 +01:00
|
|
|
#include "llvm/Support/CommandLine.h"
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
#define DEBUG_TYPE "aarch64-ccmp"
|
2014-04-22 04:41:26 +02:00
|
|
|
|
2014-03-29 11:18:08 +01:00
|
|
|
// Absolute maximum number of instructions allowed per speculated block.
|
|
|
|
// This bypasses all other heuristics, so it should be set fairly high.
|
|
|
|
static cl::opt<unsigned> BlockInstrLimit(
|
2014-05-24 14:50:23 +02:00
|
|
|
"aarch64-ccmp-limit", cl::init(30), cl::Hidden,
|
2014-03-29 11:18:08 +01:00
|
|
|
cl::desc("Maximum number of instructions per speculated block."));
|
|
|
|
|
|
|
|
// Stress testing mode - disable heuristics.
|
2014-05-24 14:50:23 +02:00
|
|
|
static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
|
2014-03-29 11:18:08 +01:00
|
|
|
cl::desc("Turn all knobs to 11"));
|
|
|
|
|
|
|
|
STATISTIC(NumConsidered, "Number of ccmps considered");
|
|
|
|
STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
|
|
|
|
STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
|
|
|
|
STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
|
|
|
|
STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
|
|
|
|
STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
|
|
|
|
STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
|
|
|
|
STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
|
|
|
|
STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
|
2014-04-30 15:14:14 +02:00
|
|
|
STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
|
|
|
|
STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
|
|
|
|
|
|
|
|
STATISTIC(NumConverted, "Number of ccmp instructions created");
|
|
|
|
STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// SSACCmpConv
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
|
|
|
|
// after determining if it is possible. The class contains no heuristics;
|
|
|
|
// external code should be used to determine when ccmp-conversion is a good
|
|
|
|
// idea.
|
|
|
|
//
|
|
|
|
// CCmp-formation works on a CFG representing chained conditions, typically
|
|
|
|
// from C's short-circuit || and && operators:
|
|
|
|
//
|
|
|
|
// From: Head To: Head
|
|
|
|
// / | CmpBB
|
|
|
|
// / | / |
|
|
|
|
// | CmpBB / |
|
|
|
|
// | / | Tail |
|
|
|
|
// | / | | |
|
|
|
|
// Tail | | |
|
|
|
|
// | | | |
|
|
|
|
// ... ... ... ...
|
|
|
|
//
|
|
|
|
// The Head block is terminated by a br.cond instruction, and the CmpBB block
|
|
|
|
// contains compare + br.cond. Tail must be a successor of both.
|
|
|
|
//
|
|
|
|
// The cmp-conversion turns the compare instruction in CmpBB into a conditional
|
|
|
|
// compare, and merges CmpBB into Head, speculatively executing its
|
2014-05-24 14:50:23 +02:00
|
|
|
// instructions. The AArch64 conditional compare instructions have an immediate
|
2014-03-29 11:18:08 +01:00
|
|
|
// operand that specifies the NZCV flag values when the condition is false and
|
|
|
|
// the compare isn't executed. This makes it possible to chain compares with
|
|
|
|
// different condition codes.
|
|
|
|
//
|
|
|
|
// Example:
|
|
|
|
//
|
|
|
|
// if (a == 5 || b == 17)
|
|
|
|
// foo();
|
|
|
|
//
|
|
|
|
// Head:
|
|
|
|
// cmp w0, #5
|
|
|
|
// b.eq Tail
|
|
|
|
// CmpBB:
|
|
|
|
// cmp w1, #17
|
|
|
|
// b.eq Tail
|
|
|
|
// ...
|
|
|
|
// Tail:
|
|
|
|
// bl _foo
|
|
|
|
//
|
|
|
|
// Becomes:
|
|
|
|
//
|
|
|
|
// Head:
|
|
|
|
// cmp w0, #5
|
|
|
|
// ccmp w1, #17, 4, ne ; 4 = nZcv
|
|
|
|
// b.eq Tail
|
|
|
|
// ...
|
|
|
|
// Tail:
|
|
|
|
// bl _foo
|
|
|
|
//
|
|
|
|
// The ccmp condition code is the one that would cause the Head terminator to
|
|
|
|
// branch to CmpBB.
|
|
|
|
//
|
|
|
|
// FIXME: It should also be possible to speculate a block on the critical edge
|
|
|
|
// between Head and Tail, just like if-converting a diamond.
|
|
|
|
//
|
|
|
|
// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
class SSACCmpConv {
|
|
|
|
MachineFunction *MF;
|
|
|
|
const TargetInstrInfo *TII;
|
|
|
|
const TargetRegisterInfo *TRI;
|
|
|
|
MachineRegisterInfo *MRI;
|
2017-06-27 17:00:22 +02:00
|
|
|
const MachineBranchProbabilityInfo *MBPI;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
public:
|
|
|
|
/// The first block containing a conditional branch, dominating everything
|
|
|
|
/// else.
|
|
|
|
MachineBasicBlock *Head;
|
|
|
|
|
2014-04-09 16:47:27 +02:00
|
|
|
/// The block containing cmp+br.cond with a successor shared with Head.
|
2014-03-29 11:18:08 +01:00
|
|
|
MachineBasicBlock *CmpBB;
|
|
|
|
|
|
|
|
/// The common successor for Head and CmpBB.
|
|
|
|
MachineBasicBlock *Tail;
|
|
|
|
|
|
|
|
/// The compare instruction in CmpBB that can be converted to a ccmp.
|
|
|
|
MachineInstr *CmpMI;
|
|
|
|
|
|
|
|
private:
|
2020-01-21 16:47:35 +01:00
|
|
|
/// The branch condition in Head as determined by analyzeBranch.
|
2014-03-29 11:18:08 +01:00
|
|
|
SmallVector<MachineOperand, 4> HeadCond;
|
|
|
|
|
|
|
|
/// The condition code that makes Head branch to CmpBB.
|
2014-05-24 14:50:23 +02:00
|
|
|
AArch64CC::CondCode HeadCmpBBCC;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
/// The branch condition in CmpBB.
|
|
|
|
SmallVector<MachineOperand, 4> CmpBBCond;
|
|
|
|
|
|
|
|
/// The condition code that makes CmpBB branch to Tail.
|
2014-05-24 14:50:23 +02:00
|
|
|
AArch64CC::CondCode CmpBBTailCC;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
/// Check if the Tail PHIs are trivially convertible.
|
|
|
|
bool trivialTailPHIs();
|
|
|
|
|
|
|
|
/// Remove CmpBB from the Tail PHIs.
|
|
|
|
void updateTailPHIs();
|
|
|
|
|
|
|
|
/// Check if an operand defining DstReg is dead.
|
|
|
|
bool isDeadDef(unsigned DstReg);
|
|
|
|
|
|
|
|
/// Find the compare instruction in MBB that controls the conditional branch.
|
|
|
|
/// Return NULL if a convertible instruction can't be found.
|
|
|
|
MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
|
|
|
|
|
|
|
|
/// Return true if all non-terminator instructions in MBB can be safely
|
|
|
|
/// speculated.
|
|
|
|
bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// runOnMachineFunction - Initialize per-function data structures.
|
2017-06-27 17:00:22 +02:00
|
|
|
void runOnMachineFunction(MachineFunction &MF,
|
|
|
|
const MachineBranchProbabilityInfo *MBPI) {
|
2014-03-29 11:18:08 +01:00
|
|
|
this->MF = &MF;
|
2017-06-27 17:00:22 +02:00
|
|
|
this->MBPI = MBPI;
|
2014-08-05 04:39:49 +02:00
|
|
|
TII = MF.getSubtarget().getInstrInfo();
|
|
|
|
TRI = MF.getSubtarget().getRegisterInfo();
|
2014-03-29 11:18:08 +01:00
|
|
|
MRI = &MF.getRegInfo();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// If the sub-CFG headed by MBB can be cmp-converted, initialize the
|
|
|
|
/// internal state, and return true.
|
|
|
|
bool canConvert(MachineBasicBlock *MBB);
|
|
|
|
|
|
|
|
/// Cmo-convert the last block passed to canConvertCmp(), assuming
|
|
|
|
/// it is possible. Add any erased blocks to RemovedBlocks.
|
|
|
|
void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
|
|
|
|
|
|
|
|
/// Return the expected code size delta if the conversion into a
|
|
|
|
/// conditional compare is performed.
|
|
|
|
int expectedCodeSizeDelta() const;
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
|
|
|
// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
|
|
|
|
// This means that no if-conversion is required when merging CmpBB into Head.
|
|
|
|
bool SSACCmpConv::trivialTailPHIs() {
|
2014-04-10 23:49:24 +02:00
|
|
|
for (auto &I : *Tail) {
|
|
|
|
if (!I.isPHI())
|
|
|
|
break;
|
2014-03-29 11:18:08 +01:00
|
|
|
unsigned HeadReg = 0, CmpBBReg = 0;
|
|
|
|
// PHI operands come in (VReg, MBB) pairs.
|
2014-04-10 23:49:24 +02:00
|
|
|
for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
|
|
|
|
MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 00:40:53 +02:00
|
|
|
Register Reg = I.getOperand(oi).getReg();
|
2014-03-29 11:18:08 +01:00
|
|
|
if (MBB == Head) {
|
|
|
|
assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
|
|
|
|
HeadReg = Reg;
|
|
|
|
}
|
|
|
|
if (MBB == CmpBB) {
|
|
|
|
assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
|
|
|
|
CmpBBReg = Reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (HeadReg != CmpBBReg)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
|
|
|
|
// removing the CmpBB operands. The Head operands will be identical.
|
|
|
|
void SSACCmpConv::updateTailPHIs() {
|
2014-04-10 23:49:24 +02:00
|
|
|
for (auto &I : *Tail) {
|
|
|
|
if (!I.isPHI())
|
|
|
|
break;
|
2014-03-29 11:18:08 +01:00
|
|
|
// I is a PHI. It can have multiple entries for CmpBB.
|
2014-04-10 23:49:24 +02:00
|
|
|
for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
|
2014-03-29 11:18:08 +01:00
|
|
|
// PHI operands are (Reg, MBB) at (oi-2, oi-1).
|
2014-04-10 23:49:24 +02:00
|
|
|
if (I.getOperand(oi - 1).getMBB() == CmpBB) {
|
|
|
|
I.RemoveOperand(oi - 1);
|
|
|
|
I.RemoveOperand(oi - 2);
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
|
|
|
|
// are still writing virtual registers without any uses.
|
2014-03-29 11:18:08 +01:00
|
|
|
bool SSACCmpConv::isDeadDef(unsigned DstReg) {
|
|
|
|
// Writes to the zero register are dead.
|
2014-05-24 14:50:23 +02:00
|
|
|
if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
|
2014-03-29 11:18:08 +01:00
|
|
|
return true;
|
2019-08-02 01:27:28 +02:00
|
|
|
if (!Register::isVirtualRegister(DstReg))
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
// A virtual register def without any uses will be marked dead later, and
|
|
|
|
// eventually replaced by the zero register.
|
|
|
|
return MRI->use_nodbg_empty(DstReg);
|
|
|
|
}
|
|
|
|
|
2020-01-21 16:47:35 +01:00
|
|
|
// Parse a condition code returned by analyzeBranch, and compute the CondCode
|
2014-03-29 11:18:08 +01:00
|
|
|
// corresponding to TBB.
|
|
|
|
// Return
|
2014-05-24 14:50:23 +02:00
|
|
|
static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
|
2014-03-29 11:18:08 +01:00
|
|
|
// A normal br.cond simply has the condition code.
|
|
|
|
if (Cond[0].getImm() != -1) {
|
|
|
|
assert(Cond.size() == 1 && "Unknown Cond array format");
|
2014-05-24 14:50:23 +02:00
|
|
|
CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
|
2014-03-29 11:18:08 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// For tbz and cbz instruction, the opcode is next.
|
|
|
|
switch (Cond[1].getImm()) {
|
|
|
|
default:
|
|
|
|
// This includes tbz / tbnz branches which can't be converted to
|
|
|
|
// ccmp + br.cond.
|
|
|
|
return false;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBZX:
|
2014-03-29 11:18:08 +01:00
|
|
|
assert(Cond.size() == 3 && "Unknown Cond array format");
|
2014-05-24 14:50:23 +02:00
|
|
|
CC = AArch64CC::EQ;
|
2014-03-29 11:18:08 +01:00
|
|
|
return true;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBNZX:
|
2014-03-29 11:18:08 +01:00
|
|
|
assert(Cond.size() == 3 && "Unknown Cond array format");
|
2014-05-24 14:50:23 +02:00
|
|
|
CC = AArch64CC::NE;
|
2014-03-29 11:18:08 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
|
|
|
|
MachineBasicBlock::iterator I = MBB->getFirstTerminator();
|
|
|
|
if (I == MBB->end())
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
// The terminator must be controlled by the flags.
|
2014-05-24 14:50:23 +02:00
|
|
|
if (!I->readsRegister(AArch64::NZCV)) {
|
2014-03-29 11:18:08 +01:00
|
|
|
switch (I->getOpcode()) {
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBNZX:
|
2014-03-29 11:18:08 +01:00
|
|
|
// These can be converted into a ccmp against #0.
|
2016-07-08 22:29:42 +02:00
|
|
|
return &*I;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
++NumCmpTermRejs;
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Now find the instruction controlling the terminator.
|
|
|
|
for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
|
2020-04-14 23:27:48 +02:00
|
|
|
I = prev_nodbg(I, MBB->begin());
|
2014-03-29 11:18:08 +01:00
|
|
|
assert(!I->isTerminator() && "Spurious terminator");
|
|
|
|
switch (I->getOpcode()) {
|
|
|
|
// cmp is an alias for subs with a dead destination register.
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::SUBSWri:
|
|
|
|
case AArch64::SUBSXri:
|
2014-03-29 11:18:08 +01:00
|
|
|
// cmn is an alias for adds with a dead destination register.
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::ADDSWri:
|
|
|
|
case AArch64::ADDSXri:
|
2014-03-29 11:18:08 +01:00
|
|
|
// Check that the immediate operand is within range, ccmp wants a uimm5.
|
|
|
|
// Rd = SUBSri Rn, imm, shift
|
|
|
|
if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumImmRangeRejs;
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
2016-08-17 22:30:52 +02:00
|
|
|
LLVM_FALLTHROUGH;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::SUBSWrr:
|
|
|
|
case AArch64::SUBSXrr:
|
|
|
|
case AArch64::ADDSWrr:
|
|
|
|
case AArch64::ADDSXrr:
|
2014-03-29 11:18:08 +01:00
|
|
|
if (isDeadDef(I->getOperand(0).getReg()))
|
2016-07-08 22:29:42 +02:00
|
|
|
return &*I;
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
|
|
|
|
<< *I);
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumLiveDstRejs;
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::FCMPSrr:
|
|
|
|
case AArch64::FCMPDrr:
|
|
|
|
case AArch64::FCMPESrr:
|
|
|
|
case AArch64::FCMPEDrr:
|
2016-07-08 22:29:42 +02:00
|
|
|
return &*I;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check for flag reads and clobbers.
|
2019-12-02 21:00:56 +01:00
|
|
|
PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
|
2014-03-29 11:18:08 +01:00
|
|
|
|
2015-12-11 20:42:09 +01:00
|
|
|
if (PRI.Read) {
|
2014-03-29 11:18:08 +01:00
|
|
|
// The ccmp doesn't produce exactly the same flags as the original
|
|
|
|
// compare, so reject the transform if there are uses of the flags
|
|
|
|
// besides the terminators.
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
|
2014-04-30 15:14:14 +02:00
|
|
|
++NumMultNZCVUses;
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
2015-12-11 20:42:09 +01:00
|
|
|
if (PRI.Defined || PRI.Clobbered) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
|
2014-04-30 15:14:14 +02:00
|
|
|
++NumUnknNZCVDefs;
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
}
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
|
|
|
|
<< '\n');
|
2014-04-25 07:30:21 +02:00
|
|
|
return nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Determine if all the instructions in MBB can safely
|
|
|
|
/// be speculated. The terminators are not considered.
|
|
|
|
///
|
|
|
|
/// Only CmpMI is allowed to clobber the flags.
|
|
|
|
///
|
|
|
|
bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
|
|
|
|
const MachineInstr *CmpMI) {
|
2014-04-30 15:14:14 +02:00
|
|
|
// Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
|
2014-03-29 11:18:08 +01:00
|
|
|
// get right.
|
|
|
|
if (!MBB->livein_empty()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned InstrCount = 0;
|
|
|
|
|
|
|
|
// Check all instructions, except the terminators. It is assumed that
|
|
|
|
// terminators never have side effects or define any used register values.
|
2014-04-10 23:49:24 +02:00
|
|
|
for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
|
2018-05-09 04:42:00 +02:00
|
|
|
if (I.isDebugInstr())
|
2014-03-29 11:18:08 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (++InstrCount > BlockInstrLimit && !Stress) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
|
|
|
|
<< BlockInstrLimit << " instructions.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// There shouldn't normally be any phis in a single-predecessor block.
|
2014-04-10 23:49:24 +02:00
|
|
|
if (I.isPHI()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Don't speculate loads. Note that it may be possible and desirable to
|
|
|
|
// speculate GOT or constant pool loads that are guaranteed not to trap,
|
|
|
|
// but we don't support that for now.
|
2014-04-10 23:49:24 +02:00
|
|
|
if (I.mayLoad()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We never speculate stores, so an AA pointer isn't necessary.
|
|
|
|
bool DontMoveAcrossStore = true;
|
2015-05-19 23:22:20 +02:00
|
|
|
if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-04-09 16:47:27 +02:00
|
|
|
// Only CmpMI is allowed to clobber the flags.
|
2014-05-24 14:50:23 +02:00
|
|
|
if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
|
|
|
|
/// candidate for cmp-conversion. Fill out the internal state.
|
|
|
|
///
|
|
|
|
bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
|
|
|
|
Head = MBB;
|
2014-04-25 07:30:21 +02:00
|
|
|
Tail = CmpBB = nullptr;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
if (Head->succ_size() != 2)
|
|
|
|
return false;
|
|
|
|
MachineBasicBlock *Succ0 = Head->succ_begin()[0];
|
|
|
|
MachineBasicBlock *Succ1 = Head->succ_begin()[1];
|
|
|
|
|
|
|
|
// CmpBB can only have a single predecessor. Tail is allowed many.
|
|
|
|
if (Succ0->pred_size() != 1)
|
|
|
|
std::swap(Succ0, Succ1);
|
|
|
|
|
|
|
|
// Succ0 is our candidate for CmpBB.
|
|
|
|
if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
CmpBB = Succ0;
|
|
|
|
Tail = Succ1;
|
|
|
|
|
|
|
|
if (!CmpBB->isSuccessor(Tail))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// The CFG topology checks out.
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
|
|
|
|
<< printMBBReference(*CmpBB) << " -> "
|
|
|
|
<< printMBBReference(*Tail) << '\n');
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumConsidered;
|
|
|
|
|
|
|
|
// Tail is allowed to have many predecessors, but we can't handle PHIs yet.
|
|
|
|
//
|
|
|
|
// FIXME: Real PHIs could be if-converted as long as the CmpBB values are
|
|
|
|
// defined before The CmpBB cmp clobbers the flags. Alternatively, it should
|
|
|
|
// always be safe to sink the ccmp down to immediately before the CmpBB
|
|
|
|
// terminators.
|
|
|
|
if (!trivialTailPHIs()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumPhiRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Tail->livein_empty()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumPhysRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// CmpBB should never have PHIs since Head is its only predecessor.
|
|
|
|
// FIXME: Clean them up if it happens.
|
|
|
|
if (!CmpBB->empty() && CmpBB->front().isPHI()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumPhi2Rejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!CmpBB->livein_empty()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumPhysRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The branch we're looking to eliminate must be analyzable.
|
|
|
|
HeadCond.clear();
|
2014-04-25 07:30:21 +02:00
|
|
|
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
2016-07-15 16:41:04 +02:00
|
|
|
if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumHeadBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is weird, probably some sort of degenerate CFG, or an edge to a
|
|
|
|
// landing pad.
|
|
|
|
if (!TBB || HeadCond.empty()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(
|
2020-01-21 16:47:35 +01:00
|
|
|
dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumHeadBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!parseCond(HeadCond, HeadCmpBBCC)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumHeadBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure the branch direction is right.
|
|
|
|
if (TBB != CmpBB) {
|
|
|
|
assert(TBB == Tail && "Unexpected TBB");
|
2014-05-24 14:50:23 +02:00
|
|
|
HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
CmpBBCond.clear();
|
2014-04-25 07:30:21 +02:00
|
|
|
TBB = FBB = nullptr;
|
2016-07-15 16:41:04 +02:00
|
|
|
if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumCmpBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TBB || CmpBBCond.empty()) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(
|
2020-01-21 16:47:35 +01:00
|
|
|
dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumCmpBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!parseCond(CmpBBCond, CmpBBTailCC)) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumCmpBranchRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (TBB != Tail)
|
2014-05-24 14:50:23 +02:00
|
|
|
CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
|
2014-03-29 11:18:08 +01:00
|
|
|
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Head->CmpBB on "
|
|
|
|
<< AArch64CC::getCondCodeName(HeadCmpBBCC)
|
|
|
|
<< ", CmpBB->Tail on "
|
|
|
|
<< AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
CmpMI = findConvertibleCompare(CmpBB);
|
|
|
|
if (!CmpMI)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!canSpeculateInstrs(CmpBB, CmpMI)) {
|
|
|
|
++NumSpeculateRejs;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
|
|
|
|
<< printMBBReference(*Head) << ":\n"
|
|
|
|
<< *CmpBB);
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
|
|
|
|
// Update the CFG first.
|
|
|
|
updateTailPHIs();
|
2017-06-27 17:00:22 +02:00
|
|
|
|
|
|
|
// Save successor probabilties before removing CmpBB and Tail from their
|
|
|
|
// parents.
|
|
|
|
BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
|
|
|
|
BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
|
|
|
|
|
|
|
|
Head->removeSuccessor(CmpBB);
|
|
|
|
CmpBB->removeSuccessor(Tail);
|
|
|
|
|
|
|
|
// If Head and CmpBB had successor probabilties, udpate the probabilities to
|
|
|
|
// reflect the ccmp-conversion.
|
|
|
|
if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
|
|
|
|
|
|
|
|
// Head is allowed two successors. We've removed CmpBB, so the remaining
|
|
|
|
// successor is Tail. We need to increase the successor probability for
|
|
|
|
// Tail to account for the CmpBB path we removed.
|
|
|
|
//
|
|
|
|
// Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
|
|
|
|
assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
|
|
|
|
BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
|
|
|
|
Head->setSuccProbability(Head->succ_begin(),
|
|
|
|
Head2Tail + Head2CmpBB * CmpBB2Tail);
|
|
|
|
|
|
|
|
// We will transfer successors of CmpBB to Head in a moment without
|
|
|
|
// normalizing the successor probabilities. Set the successor probabilites
|
|
|
|
// before doing so.
|
|
|
|
//
|
|
|
|
// Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
|
|
|
|
for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
|
|
|
|
BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
|
|
|
|
CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-03-29 11:18:08 +01:00
|
|
|
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
|
|
|
|
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
|
2016-09-14 22:43:16 +02:00
|
|
|
TII->removeBranch(*Head);
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
// If the Head terminator was one of the cbz / tbz branches with built-in
|
|
|
|
// compare, we need to insert an explicit compare instruction in its place.
|
|
|
|
if (HeadCond[0].getImm() == -1) {
|
|
|
|
++NumCompBranches;
|
|
|
|
unsigned Opc = 0;
|
|
|
|
switch (HeadCond[1].getImm()) {
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
Opc = AArch64::SUBSWri;
|
2014-03-29 11:18:08 +01:00
|
|
|
break;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
Opc = AArch64::SUBSXri;
|
2014-03-29 11:18:08 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Cannot convert Head branch");
|
|
|
|
}
|
|
|
|
const MCInstrDesc &MCID = TII->get(Opc);
|
|
|
|
// Create a dummy virtual register for the SUBS def.
|
[aarch64] Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM
Summary:
This clang-tidy check is looking for unsigned integer variables whose initializer
starts with an implicit cast from llvm::Register and changes the type of the
variable to llvm::Register (dropping the llvm:: where possible).
Manual fixups in:
AArch64InstrInfo.cpp - genFusedMultiply() now takes a Register* instead of unsigned*
AArch64LoadStoreOptimizer.cpp - Ternary operator was ambiguous between Register/MCRegister. Settled on Register
Depends on D65919
Reviewers: aemerson
Subscribers: jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits
Tags: #llvm
Differential Revision for full review was: https://reviews.llvm.org/D65962
llvm-svn: 368628
2019-08-13 00:40:53 +02:00
|
|
|
Register DestReg =
|
2014-03-29 11:18:08 +01:00
|
|
|
MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
|
|
|
|
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
|
|
|
|
BuildMI(*Head, Head->end(), TermDL, MCID)
|
|
|
|
.addReg(DestReg, RegState::Define | RegState::Dead)
|
2017-01-13 10:58:52 +01:00
|
|
|
.add(HeadCond[2])
|
2014-03-29 11:18:08 +01:00
|
|
|
.addImm(0)
|
|
|
|
.addImm(0);
|
|
|
|
// SUBS uses the GPR*sp register classes.
|
|
|
|
MRI->constrainRegClass(HeadCond[2].getReg(),
|
|
|
|
TII->getRegClass(MCID, 1, TRI, *MF));
|
|
|
|
}
|
|
|
|
|
|
|
|
Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
|
|
|
|
|
|
|
|
// Now replace CmpMI with a ccmp instruction that also considers the incoming
|
|
|
|
// flags.
|
|
|
|
unsigned Opc = 0;
|
|
|
|
unsigned FirstOp = 1; // First CmpMI operand to copy.
|
|
|
|
bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
|
|
|
|
switch (CmpMI->getOpcode()) {
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Unknown compare opcode");
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
|
|
|
|
case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
|
|
|
|
case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
|
|
|
|
case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
|
|
|
|
case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
|
|
|
|
case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
|
|
|
|
case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
|
|
|
|
case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
|
|
|
|
case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
|
|
|
|
case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
|
|
|
|
case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
|
|
|
|
case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
|
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
Opc = AArch64::CCMPWi;
|
2014-03-29 11:18:08 +01:00
|
|
|
FirstOp = 0;
|
|
|
|
isZBranch = true;
|
|
|
|
break;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZX:
|
|
|
|
Opc = AArch64::CCMPXi;
|
2014-03-29 11:18:08 +01:00
|
|
|
FirstOp = 0;
|
|
|
|
isZBranch = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The ccmp instruction should set the flags according to the comparison when
|
|
|
|
// Head would have branched to CmpBB.
|
|
|
|
// The NZCV immediate operand should provide flags for the case where Head
|
|
|
|
// would have branched to Tail. These flags should cause the new Head
|
|
|
|
// terminator to branch to tail.
|
2014-05-24 14:50:23 +02:00
|
|
|
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
|
2014-03-29 11:18:08 +01:00
|
|
|
const MCInstrDesc &MCID = TII->get(Opc);
|
|
|
|
MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
|
|
|
|
TII->getRegClass(MCID, 0, TRI, *MF));
|
|
|
|
if (CmpMI->getOperand(FirstOp + 1).isReg())
|
|
|
|
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
|
|
|
|
TII->getRegClass(MCID, 1, TRI, *MF));
|
2017-01-13 10:58:52 +01:00
|
|
|
MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
|
|
|
|
.add(CmpMI->getOperand(FirstOp)); // Register Rn
|
2014-03-29 11:18:08 +01:00
|
|
|
if (isZBranch)
|
|
|
|
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
|
|
|
|
else
|
2017-01-13 10:58:52 +01:00
|
|
|
MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
|
2014-03-29 11:18:08 +01:00
|
|
|
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
|
|
|
|
|
|
|
|
// If CmpMI was a terminator, we need a new conditional branch to replace it.
|
|
|
|
// This now becomes a Head terminator.
|
|
|
|
if (isZBranch) {
|
2014-05-24 14:50:23 +02:00
|
|
|
bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
|
|
|
|
CmpMI->getOpcode() == AArch64::CBNZX;
|
|
|
|
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
|
|
|
|
.addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
|
2017-01-13 10:58:52 +01:00
|
|
|
.add(CmpMI->getOperand(1)); // Branch target.
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
CmpMI->eraseFromParent();
|
MachineBasicBlock::updateTerminator now requires an explicit layout successor.
Previously, it tried to infer the correct destination block from the
successor list, but this is a rather tricky propspect, given the
existence of successors that occur mid-block, such as invoke, and
potentially in the future, callbr/INLINEASM_BR. (INLINEASM_BR, in
particular would be problematic, because its successor blocks are not
distinct from "normal" successors, as EHPads are.)
Instead, require the caller to pass in the expected fallthrough
successor explicitly. In most callers, the correct block is
immediately clear. But, in MachineBlockPlacement, we do need to record
the original ordering, before starting to reorder blocks.
Unfortunately, the goal of decoupling the behavior of end-of-block
jumps from the successor list has not been fully accomplished in this
patch, as there is currently no other way to determine whether a block
is intended to fall-through, or end as unreachable. Further work is
needed there.
Differential Revision: https://reviews.llvm.org/D79605
2020-02-19 16:41:28 +01:00
|
|
|
Head->updateTerminator(CmpBB->getNextNode());
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
RemovedBlocks.push_back(CmpBB);
|
|
|
|
CmpBB->eraseFromParent();
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
|
2014-03-29 11:18:08 +01:00
|
|
|
++NumConverted;
|
|
|
|
}
|
|
|
|
|
|
|
|
int SSACCmpConv::expectedCodeSizeDelta() const {
|
|
|
|
int delta = 0;
|
|
|
|
// If the Head terminator was one of the cbz / tbz branches with built-in
|
|
|
|
// compare, we need to insert an explicit compare instruction in its place
|
|
|
|
// plus a branch instruction.
|
|
|
|
if (HeadCond[0].getImm() == -1) {
|
|
|
|
switch (HeadCond[1].getImm()) {
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZX:
|
2014-03-29 11:18:08 +01:00
|
|
|
// Therefore delta += 1
|
|
|
|
delta = 1;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Cannot convert Head branch");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If the Cmp terminator was one of the cbz / tbz branches with
|
|
|
|
// built-in compare, it will be turned into a compare instruction
|
|
|
|
// into Head, but we do not save any instruction.
|
|
|
|
// Otherwise, we save the branch instruction.
|
|
|
|
switch (CmpMI->getOpcode()) {
|
|
|
|
default:
|
|
|
|
--delta;
|
|
|
|
break;
|
2014-05-24 14:50:23 +02:00
|
|
|
case AArch64::CBZW:
|
|
|
|
case AArch64::CBNZW:
|
|
|
|
case AArch64::CBZX:
|
|
|
|
case AArch64::CBNZX:
|
2014-03-29 11:18:08 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
return delta;
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
2014-05-24 14:50:23 +02:00
|
|
|
// AArch64ConditionalCompares Pass
|
2014-03-29 11:18:08 +01:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
namespace {
|
2014-05-24 14:50:23 +02:00
|
|
|
class AArch64ConditionalCompares : public MachineFunctionPass {
|
2017-06-27 17:00:22 +02:00
|
|
|
const MachineBranchProbabilityInfo *MBPI;
|
2014-03-29 11:18:08 +01:00
|
|
|
const TargetInstrInfo *TII;
|
|
|
|
const TargetRegisterInfo *TRI;
|
2014-09-02 19:43:54 +02:00
|
|
|
MCSchedModel SchedModel;
|
2014-03-29 11:18:08 +01:00
|
|
|
// Does the proceeded function has Oz attribute.
|
|
|
|
bool MinSize;
|
|
|
|
MachineRegisterInfo *MRI;
|
|
|
|
MachineDominatorTree *DomTree;
|
|
|
|
MachineLoopInfo *Loops;
|
|
|
|
MachineTraceMetrics *Traces;
|
|
|
|
MachineTraceMetrics::Ensemble *MinInstr;
|
|
|
|
SSACCmpConv CmpConv;
|
|
|
|
|
|
|
|
public:
|
|
|
|
static char ID;
|
2016-08-01 07:56:57 +02:00
|
|
|
AArch64ConditionalCompares() : MachineFunctionPass(ID) {
|
|
|
|
initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
|
|
|
|
}
|
2014-04-29 09:58:25 +02:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
2016-10-01 04:56:57 +02:00
|
|
|
StringRef getPassName() const override {
|
2014-05-24 14:50:23 +02:00
|
|
|
return "AArch64 Conditional Compares";
|
2014-04-29 09:58:25 +02:00
|
|
|
}
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
private:
|
|
|
|
bool tryConvert(MachineBasicBlock *);
|
|
|
|
void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
|
|
|
|
void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
|
|
|
|
void invalidateTraces();
|
|
|
|
bool shouldConvert();
|
|
|
|
};
|
|
|
|
} // end anonymous namespace
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
char AArch64ConditionalCompares::ID = 0;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
|
|
|
|
"AArch64 CCMP Pass", false, false)
|
2017-06-27 17:00:22 +02:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
2014-03-29 11:18:08 +01:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
|
2014-05-24 14:50:23 +02:00
|
|
|
INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
|
|
|
|
"AArch64 CCMP Pass", false, false)
|
2014-03-29 11:18:08 +01:00
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
FunctionPass *llvm::createAArch64ConditionalCompares() {
|
|
|
|
return new AArch64ConditionalCompares();
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
|
2017-06-27 17:00:22 +02:00
|
|
|
AU.addRequired<MachineBranchProbabilityInfo>();
|
2014-03-29 11:18:08 +01:00
|
|
|
AU.addRequired<MachineDominatorTree>();
|
|
|
|
AU.addPreserved<MachineDominatorTree>();
|
|
|
|
AU.addRequired<MachineLoopInfo>();
|
|
|
|
AU.addPreserved<MachineLoopInfo>();
|
|
|
|
AU.addRequired<MachineTraceMetrics>();
|
|
|
|
AU.addPreserved<MachineTraceMetrics>();
|
|
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Update the dominator tree after if-conversion erased some blocks.
|
2014-05-24 14:50:23 +02:00
|
|
|
void AArch64ConditionalCompares::updateDomTree(
|
|
|
|
ArrayRef<MachineBasicBlock *> Removed) {
|
2014-03-29 11:18:08 +01:00
|
|
|
// convert() removes CmpBB which was previously dominated by Head.
|
|
|
|
// CmpBB children should be transferred to Head.
|
|
|
|
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
|
2015-08-03 21:04:32 +02:00
|
|
|
for (MachineBasicBlock *RemovedMBB : Removed) {
|
|
|
|
MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
|
2014-03-29 11:18:08 +01:00
|
|
|
assert(Node != HeadNode && "Cannot erase the head node");
|
|
|
|
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
|
|
|
|
while (Node->getNumChildren())
|
DomTree: Remove getChildren() accessor
Summary:
Avoid exposing details about how children are stored. This will enable
subsequent type-erasure changes.
New methods are introduced to cover common access patterns.
Change-Id: Idb5f4b1b9c84e4cc71ddb39bb52a388682f5674f
Reviewers: arsenm, RKSimon, mehdi_amini, courbet
Subscribers: qcolombet, sdardis, wdng, hiraditya, jrtc27, zzheng, atanasyan, asbirlea, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D83083
2020-05-18 16:28:24 +02:00
|
|
|
DomTree->changeImmediateDominator(Node->back(), HeadNode);
|
2015-08-03 21:04:32 +02:00
|
|
|
DomTree->eraseNode(RemovedMBB);
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Update LoopInfo after if-conversion.
|
|
|
|
void
|
2014-05-24 14:50:23 +02:00
|
|
|
AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
|
2014-03-29 11:18:08 +01:00
|
|
|
if (!Loops)
|
|
|
|
return;
|
2015-08-03 21:04:32 +02:00
|
|
|
for (MachineBasicBlock *RemovedMBB : Removed)
|
|
|
|
Loops->removeBlock(RemovedMBB);
|
2014-03-29 11:18:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Invalidate MachineTraceMetrics before if-conversion.
|
2014-05-24 14:50:23 +02:00
|
|
|
void AArch64ConditionalCompares::invalidateTraces() {
|
2014-03-29 11:18:08 +01:00
|
|
|
Traces->invalidate(CmpConv.Head);
|
|
|
|
Traces->invalidate(CmpConv.CmpBB);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Apply cost model and heuristics to the if-conversion in IfConv.
|
|
|
|
/// Return true if the conversion is a good idea.
|
|
|
|
///
|
2014-05-24 14:50:23 +02:00
|
|
|
bool AArch64ConditionalCompares::shouldConvert() {
|
2014-03-29 11:18:08 +01:00
|
|
|
// Stress testing mode disables all cost considerations.
|
|
|
|
if (Stress)
|
|
|
|
return true;
|
|
|
|
if (!MinInstr)
|
|
|
|
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
|
|
|
|
|
|
|
|
// Head dominates CmpBB, so it is always included in its trace.
|
|
|
|
MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
|
|
|
|
|
|
|
|
// If code size is the main concern
|
|
|
|
if (MinSize) {
|
|
|
|
int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
|
2014-03-29 11:18:08 +01:00
|
|
|
// If we are minimizing the code size, do the conversion whatever
|
|
|
|
// the cost is.
|
|
|
|
if (CodeSizeDelta < 0)
|
|
|
|
return true;
|
|
|
|
if (CodeSizeDelta > 0) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// CodeSizeDelta == 0, continue with the regular heuristics
|
|
|
|
}
|
|
|
|
|
|
|
|
// Heuristic: The compare conversion delays the execution of the branch
|
|
|
|
// instruction because we must wait for the inputs to the second compare as
|
|
|
|
// well. The branch has no dependent instructions, but delaying it increases
|
|
|
|
// the cost of a misprediction.
|
|
|
|
//
|
|
|
|
// Set a limit on the delay we will accept.
|
2014-09-02 19:43:54 +02:00
|
|
|
unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
// Instruction depths can be computed for all trace instructions above CmpBB.
|
|
|
|
unsigned HeadDepth =
|
2016-02-22 04:33:28 +01:00
|
|
|
Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
|
2014-03-29 11:18:08 +01:00
|
|
|
unsigned CmpBBDepth =
|
2016-02-22 04:33:28 +01:00
|
|
|
Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
|
|
|
|
<< "\nCmpBB depth: " << CmpBBDepth << '\n');
|
2014-03-29 11:18:08 +01:00
|
|
|
if (CmpBBDepth > HeadDepth + DelayLimit) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
|
|
|
|
<< " cycles.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check the resource depth at the bottom of CmpBB - these instructions will
|
|
|
|
// be speculated.
|
|
|
|
unsigned ResDepth = Trace.getResourceDepth(true);
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
// Heuristic: The speculatively executed instructions must all be able to
|
|
|
|
// merge into the Head block. The Head critical path should dominate the
|
|
|
|
// resource cost of the speculated instructions.
|
|
|
|
if (ResDepth > HeadDepth) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
|
2014-03-29 11:18:08 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
|
2014-03-29 11:18:08 +01:00
|
|
|
bool Changed = false;
|
|
|
|
while (CmpConv.canConvert(MBB) && shouldConvert()) {
|
|
|
|
invalidateTraces();
|
|
|
|
SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
|
|
|
|
CmpConv.convert(RemovedBlocks);
|
|
|
|
Changed = true;
|
|
|
|
updateDomTree(RemovedBlocks);
|
|
|
|
updateLoops(RemovedBlocks);
|
|
|
|
}
|
|
|
|
return Changed;
|
|
|
|
}
|
|
|
|
|
2014-05-24 14:50:23 +02:00
|
|
|
bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
|
2018-05-14 14:53:11 +02:00
|
|
|
LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
|
|
|
|
<< "********** Function: " << MF.getName() << '\n');
|
2017-12-15 23:22:58 +01:00
|
|
|
if (skipFunction(MF.getFunction()))
|
2016-04-25 23:58:52 +02:00
|
|
|
return false;
|
|
|
|
|
2014-08-05 04:39:49 +02:00
|
|
|
TII = MF.getSubtarget().getInstrInfo();
|
|
|
|
TRI = MF.getSubtarget().getRegisterInfo();
|
2015-01-27 08:31:29 +01:00
|
|
|
SchedModel = MF.getSubtarget().getSchedModel();
|
2014-03-29 11:18:08 +01:00
|
|
|
MRI = &MF.getRegInfo();
|
|
|
|
DomTree = &getAnalysis<MachineDominatorTree>();
|
|
|
|
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
|
2017-06-27 17:00:22 +02:00
|
|
|
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
2014-03-29 11:18:08 +01:00
|
|
|
Traces = &getAnalysis<MachineTraceMetrics>();
|
2014-04-25 07:30:21 +02:00
|
|
|
MinInstr = nullptr;
|
2019-04-05 00:40:06 +02:00
|
|
|
MinSize = MF.getFunction().hasMinSize();
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
bool Changed = false;
|
2017-06-27 17:00:22 +02:00
|
|
|
CmpConv.runOnMachineFunction(MF, MBPI);
|
2014-03-29 11:18:08 +01:00
|
|
|
|
|
|
|
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
|
|
|
|
// cmp-conversions from the same head block.
|
|
|
|
// Note that updateDomTree() modifies the children of the DomTree node
|
2014-04-10 23:49:24 +02:00
|
|
|
// currently being visited. The df_iterator supports that; it doesn't look at
|
2014-03-29 11:18:08 +01:00
|
|
|
// child_begin() / child_end() until after a node has been visited.
|
2014-04-11 03:50:01 +02:00
|
|
|
for (auto *I : depth_first(DomTree))
|
2014-03-29 11:18:08 +01:00
|
|
|
if (tryConvert(I->getBlock()))
|
|
|
|
Changed = true;
|
|
|
|
|
|
|
|
return Changed;
|
|
|
|
}
|