1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/lib/Target/BPF/BPFMIPeephole.cpp

464 lines
13 KiB
C++
Raw Normal View History

//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass performs peephole optimizations to cleanup ugly code sequences at
// MachineInstruction layer.
//
// Currently, there are two optimizations implemented:
// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
// zero extend 32-bit subregisters to 64-bit registers, if the compiler
// could prove the subregisters is defined by 32-bit operations in which
// case the upper half of the underlying 64-bit registers were zeroed
// implicitly.
//
// - One post-RA PreEmit pass to do final cleanup on some redundant
// instructions generated due to bad RA on subregister.
//===----------------------------------------------------------------------===//
#include "BPF.h"
#include "BPFInstrInfo.h"
#include "BPFTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "bpf-mi-zext-elim"
STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
namespace {
struct BPFMIPeephole : public MachineFunctionPass {
static char ID;
const BPFInstrInfo *TII;
MachineFunction *MF;
MachineRegisterInfo *MRI;
BPFMIPeephole() : MachineFunctionPass(ID) {
initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
}
private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
bool isMovFrom32Def(MachineInstr *MovMI);
bool eliminateZExtSeq(void);
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
initialize(MF);
return eliminateZExtSeq();
}
};
// Initialize class variables.
void BPFMIPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
bpf: fix wrong truncation elimination when there is back-edge/loop Currently, BPF backend is doing truncation elimination. If one truncation is performed on a value defined by narrow loads, then it could be redundant given BPF loads zero extend the destination register implicitly. When the definition of the truncated value is a merging value (PHI node) that could come from different code paths, then checks need to be done on all possible code paths. Above described optimization was introduced as r306685, however it doesn't work when there is back-edge, for example when loop is used inside BPF code. For example for the following code, a zero-extended value should be stored into b[i], but the "and reg, 0xffff" is wrongly eliminated which then generates corrupted data. void cal1(unsigned short *a, unsigned long *b, unsigned int k) { unsigned short e; e = *a; for (unsigned int i = 0; i < k; i++) { b[i] = e; e = ~e; } } The reason is r306685 was trying to do the PHI node checks inside isel DAG2DAG phase, and the checks are done on MachineInstr. This is actually wrong, because MachineInstr is being built during isel phase and the associated information is not completed yet. A quick search shows none target other than BPF is access MachineInstr info during isel phase. For an PHI node, when you reached it during isel phase, it may have all predecessors linked, but not successors. It seems successors are linked to PHI node only when doing SelectionDAGISel::FinishBasicBlock and this happens later than PreprocessISelDAG hook. Previously, BPF program doesn't allow loop, there is probably the reason why this bug was not exposed. This patch therefore fixes the bug by the following approach: - The existing truncation elimination code and the associated "load_to_vreg_" records are removed. - Instead, implement truncation elimination using MachineSSA pass, this is where all information are built, and keep the pass together with other similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move elimination logic is updated accordingly. - Unit testcase included + no compilation errors for kernel BPF selftest. Patch Review === Patch was sent to and reviewed by BPF community at: https://lore.kernel.org/bpf Reported-by: David Beckett <david.beckett@netronome.com> Reviewed-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 375007
2019-10-16 17:27:59 +02:00
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
}
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
{
MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
LLVM_DEBUG(dbgs() << " Def of Mov Src:");
LLVM_DEBUG(DefInsn->dump());
if (!DefInsn)
return false;
if (DefInsn->isPHI()) {
for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) {
MachineOperand &opnd = DefInsn->getOperand(i);
if (!opnd.isReg())
return false;
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
// quick check on PHI incoming definitions.
if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY)
return false;
}
}
if (DefInsn->getOpcode() == BPF::COPY) {
MachineOperand &opnd = DefInsn->getOperand(1);
if (!opnd.isReg())
return false;
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM Summary: This clang-tidy check is looking for unsigned integer variables whose initializer starts with an implicit cast from llvm::Register and changes the type of the variable to llvm::Register (dropping the llvm:: where possible). Partial reverts in: X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned& MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register PPCFastISel.cpp - No Register::operator-=() PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned& MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor Manual fixups in: ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned& HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register. PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned& Depends on D65919 Reviewers: arsenm, bogner, craig.topper, RKSimon Reviewed By: arsenm Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65962 llvm-svn: 369041
2019-08-15 21:22:08 +02:00
Register Reg = opnd.getReg();
if ((Register::isVirtualRegister(Reg) &&
MRI->getRegClass(Reg) == &BPF::GPRRegClass))
return false;
}
LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
return true;
}
bool BPFMIPeephole::eliminateZExtSeq(void) {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
// If the previous instruction was marked for elimination, remove it now.
if (ToErase) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
// Eliminate the 32-bit to 64-bit zero extension sequence when possible.
//
// MOV_32_64 rB, wA
// SLL_ri rB, rB, 32
// SRL_ri rB, rB, 32
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM Summary: This clang-tidy check is looking for unsigned integer variables whose initializer starts with an implicit cast from llvm::Register and changes the type of the variable to llvm::Register (dropping the llvm:: where possible). Partial reverts in: X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned& MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register PPCFastISel.cpp - No Register::operator-=() PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned& MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor Manual fixups in: ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned& HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register. PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned& Depends on D65919 Reviewers: arsenm, bogner, craig.topper, RKSimon Reviewed By: arsenm Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65962 llvm-svn: 369041
2019-08-15 21:22:08 +02:00
Register DstReg = MI.getOperand(0).getReg();
Register ShfReg = MI.getOperand(1).getReg();
MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
LLVM_DEBUG(dbgs() << "Starting SRL found:");
LLVM_DEBUG(MI.dump());
if (!SllMI ||
SllMI->isPHI() ||
SllMI->getOpcode() != BPF::SLL_ri ||
SllMI->getOperand(2).getImm() != 32)
continue;
LLVM_DEBUG(dbgs() << " SLL found:");
LLVM_DEBUG(SllMI->dump());
MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
if (!MovMI ||
MovMI->isPHI() ||
MovMI->getOpcode() != BPF::MOV_32_64)
continue;
LLVM_DEBUG(dbgs() << " Type cast Mov found:");
LLVM_DEBUG(MovMI->dump());
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM Summary: This clang-tidy check is looking for unsigned integer variables whose initializer starts with an implicit cast from llvm::Register and changes the type of the variable to llvm::Register (dropping the llvm:: where possible). Partial reverts in: X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned& MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register PPCFastISel.cpp - No Register::operator-=() PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned& MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor Manual fixups in: ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned& HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register. PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned& Depends on D65919 Reviewers: arsenm, bogner, craig.topper, RKSimon Reviewed By: arsenm Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65962 llvm-svn: 369041
2019-08-15 21:22:08 +02:00
Register SubReg = MovMI->getOperand(1).getReg();
if (!isMovFrom32Def(MovMI)) {
LLVM_DEBUG(dbgs()
<< " One ZExt elim sequence failed qualifying elim.\n");
continue;
}
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
.addImm(0).addReg(SubReg).addImm(BPF::sub_32);
SllMI->eraseFromParent();
MovMI->eraseFromParent();
// MI is the right shift, we can't erase it in it's own iteration.
// Mark it to ToErase, and erase in the next iteration.
ToErase = &MI;
ZExtElemNum++;
Eliminated = true;
}
}
}
return Eliminated;
}
} // end default namespace
INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
bpf: fix wrong truncation elimination when there is back-edge/loop Currently, BPF backend is doing truncation elimination. If one truncation is performed on a value defined by narrow loads, then it could be redundant given BPF loads zero extend the destination register implicitly. When the definition of the truncated value is a merging value (PHI node) that could come from different code paths, then checks need to be done on all possible code paths. Above described optimization was introduced as r306685, however it doesn't work when there is back-edge, for example when loop is used inside BPF code. For example for the following code, a zero-extended value should be stored into b[i], but the "and reg, 0xffff" is wrongly eliminated which then generates corrupted data. void cal1(unsigned short *a, unsigned long *b, unsigned int k) { unsigned short e; e = *a; for (unsigned int i = 0; i < k; i++) { b[i] = e; e = ~e; } } The reason is r306685 was trying to do the PHI node checks inside isel DAG2DAG phase, and the checks are done on MachineInstr. This is actually wrong, because MachineInstr is being built during isel phase and the associated information is not completed yet. A quick search shows none target other than BPF is access MachineInstr info during isel phase. For an PHI node, when you reached it during isel phase, it may have all predecessors linked, but not successors. It seems successors are linked to PHI node only when doing SelectionDAGISel::FinishBasicBlock and this happens later than PreprocessISelDAG hook. Previously, BPF program doesn't allow loop, there is probably the reason why this bug was not exposed. This patch therefore fixes the bug by the following approach: - The existing truncation elimination code and the associated "load_to_vreg_" records are removed. - Instead, implement truncation elimination using MachineSSA pass, this is where all information are built, and keep the pass together with other similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move elimination logic is updated accordingly. - Unit testcase included + no compilation errors for kernel BPF selftest. Patch Review === Patch was sent to and reviewed by BPF community at: https://lore.kernel.org/bpf Reported-by: David Beckett <david.beckett@netronome.com> Reviewed-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 375007
2019-10-16 17:27:59 +02:00
"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
false, false)
char BPFMIPeephole::ID = 0;
FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
namespace {
struct BPFMIPreEmitPeephole : public MachineFunctionPass {
static char ID;
MachineFunction *MF;
const TargetRegisterInfo *TRI;
BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
}
private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
bool eliminateRedundantMov(void);
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
initialize(MF);
return eliminateRedundantMov();
}
};
// Initialize class variables.
void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
MF = &MFParm;
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
}
bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
// If the previous instruction was marked for elimination, remove it now.
if (ToErase) {
LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
LLVM_DEBUG(ToErase->dump());
ToErase->eraseFromParent();
ToErase = nullptr;
}
// Eliminate identical move:
//
// MOV rA, rA
//
// This is particularly possible to happen when sub-register support
// enabled. The special type cast insn MOV_32_64 involves different
// register class on src (i32) and dst (i64), RA could generate useless
// instruction due to this.
bpf: fix wrong truncation elimination when there is back-edge/loop Currently, BPF backend is doing truncation elimination. If one truncation is performed on a value defined by narrow loads, then it could be redundant given BPF loads zero extend the destination register implicitly. When the definition of the truncated value is a merging value (PHI node) that could come from different code paths, then checks need to be done on all possible code paths. Above described optimization was introduced as r306685, however it doesn't work when there is back-edge, for example when loop is used inside BPF code. For example for the following code, a zero-extended value should be stored into b[i], but the "and reg, 0xffff" is wrongly eliminated which then generates corrupted data. void cal1(unsigned short *a, unsigned long *b, unsigned int k) { unsigned short e; e = *a; for (unsigned int i = 0; i < k; i++) { b[i] = e; e = ~e; } } The reason is r306685 was trying to do the PHI node checks inside isel DAG2DAG phase, and the checks are done on MachineInstr. This is actually wrong, because MachineInstr is being built during isel phase and the associated information is not completed yet. A quick search shows none target other than BPF is access MachineInstr info during isel phase. For an PHI node, when you reached it during isel phase, it may have all predecessors linked, but not successors. It seems successors are linked to PHI node only when doing SelectionDAGISel::FinishBasicBlock and this happens later than PreprocessISelDAG hook. Previously, BPF program doesn't allow loop, there is probably the reason why this bug was not exposed. This patch therefore fixes the bug by the following approach: - The existing truncation elimination code and the associated "load_to_vreg_" records are removed. - Instead, implement truncation elimination using MachineSSA pass, this is where all information are built, and keep the pass together with other similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move elimination logic is updated accordingly. - Unit testcase included + no compilation errors for kernel BPF selftest. Patch Review === Patch was sent to and reviewed by BPF community at: https://lore.kernel.org/bpf Reported-by: David Beckett <david.beckett@netronome.com> Reviewed-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 375007
2019-10-16 17:27:59 +02:00
unsigned Opcode = MI.getOpcode();
if (Opcode == BPF::MOV_32_64 ||
Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
Apply llvm-prefer-register-over-unsigned from clang-tidy to LLVM Summary: This clang-tidy check is looking for unsigned integer variables whose initializer starts with an implicit cast from llvm::Register and changes the type of the variable to llvm::Register (dropping the llvm:: where possible). Partial reverts in: X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister X86FixupLEAs.cpp - Some functions return unsigned and arguably should be MCRegister X86FrameLowering.cpp - Some functions return unsigned and arguably should be MCRegister HexagonBitSimplify.cpp - Function takes BitTracker::RegisterRef which appears to be unsigned& MachineVerifier.cpp - Ambiguous operator==() given MCRegister and const Register PPCFastISel.cpp - No Register::operator-=() PeepholeOptimizer.cpp - TargetInstrInfo::optimizeLoadInstr() takes an unsigned& MachineTraceMetrics.cpp - MachineTraceMetrics lacks a suitable constructor Manual fixups in: ARMFastISel.cpp - ARMEmitLoad() now takes a Register& instead of unsigned& HexagonSplitDouble.cpp - Ternary operator was ambiguous between unsigned/Register HexagonConstExtenders.cpp - Has a local class named Register, used llvm::Register instead of Register. PPCFastISel.cpp - PPCEmitLoad() now takes a Register& instead of unsigned& Depends on D65919 Reviewers: arsenm, bogner, craig.topper, RKSimon Reviewed By: arsenm Subscribers: RKSimon, craig.topper, lenary, aemerson, wuzish, jholewinski, MatzeB, qcolombet, dschuff, jyknight, dylanmckay, sdardis, nemanjai, jvesely, wdng, nhaehnle, sbc100, jgravelle-google, kristof.beyls, hiraditya, aheejin, kbarton, fedor.sergeev, javed.absar, asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, MaskRay, zzheng, edward-jones, atanasyan, rogfer01, MartinMosbeck, brucehoult, the_o, tpr, PkmX, jocewei, jsji, Petar.Avramovic, asbirlea, Jim, s.egerton, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65962 llvm-svn: 369041
2019-08-15 21:22:08 +02:00
Register dst = MI.getOperand(0).getReg();
Register src = MI.getOperand(1).getReg();
bpf: fix wrong truncation elimination when there is back-edge/loop Currently, BPF backend is doing truncation elimination. If one truncation is performed on a value defined by narrow loads, then it could be redundant given BPF loads zero extend the destination register implicitly. When the definition of the truncated value is a merging value (PHI node) that could come from different code paths, then checks need to be done on all possible code paths. Above described optimization was introduced as r306685, however it doesn't work when there is back-edge, for example when loop is used inside BPF code. For example for the following code, a zero-extended value should be stored into b[i], but the "and reg, 0xffff" is wrongly eliminated which then generates corrupted data. void cal1(unsigned short *a, unsigned long *b, unsigned int k) { unsigned short e; e = *a; for (unsigned int i = 0; i < k; i++) { b[i] = e; e = ~e; } } The reason is r306685 was trying to do the PHI node checks inside isel DAG2DAG phase, and the checks are done on MachineInstr. This is actually wrong, because MachineInstr is being built during isel phase and the associated information is not completed yet. A quick search shows none target other than BPF is access MachineInstr info during isel phase. For an PHI node, when you reached it during isel phase, it may have all predecessors linked, but not successors. It seems successors are linked to PHI node only when doing SelectionDAGISel::FinishBasicBlock and this happens later than PreprocessISelDAG hook. Previously, BPF program doesn't allow loop, there is probably the reason why this bug was not exposed. This patch therefore fixes the bug by the following approach: - The existing truncation elimination code and the associated "load_to_vreg_" records are removed. - Instead, implement truncation elimination using MachineSSA pass, this is where all information are built, and keep the pass together with other similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move elimination logic is updated accordingly. - Unit testcase included + no compilation errors for kernel BPF selftest. Patch Review === Patch was sent to and reviewed by BPF community at: https://lore.kernel.org/bpf Reported-by: David Beckett <david.beckett@netronome.com> Reviewed-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 375007
2019-10-16 17:27:59 +02:00
if (Opcode == BPF::MOV_32_64)
dst = TRI->getSubReg(dst, BPF::sub_32);
if (dst != src)
continue;
ToErase = &MI;
RedundantMovElemNum++;
Eliminated = true;
}
}
}
return Eliminated;
}
} // end default namespace
INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
"BPF PreEmit Peephole Optimization", false, false)
char BPFMIPreEmitPeephole::ID = 0;
FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
{
return new BPFMIPreEmitPeephole();
}
bpf: fix wrong truncation elimination when there is back-edge/loop Currently, BPF backend is doing truncation elimination. If one truncation is performed on a value defined by narrow loads, then it could be redundant given BPF loads zero extend the destination register implicitly. When the definition of the truncated value is a merging value (PHI node) that could come from different code paths, then checks need to be done on all possible code paths. Above described optimization was introduced as r306685, however it doesn't work when there is back-edge, for example when loop is used inside BPF code. For example for the following code, a zero-extended value should be stored into b[i], but the "and reg, 0xffff" is wrongly eliminated which then generates corrupted data. void cal1(unsigned short *a, unsigned long *b, unsigned int k) { unsigned short e; e = *a; for (unsigned int i = 0; i < k; i++) { b[i] = e; e = ~e; } } The reason is r306685 was trying to do the PHI node checks inside isel DAG2DAG phase, and the checks are done on MachineInstr. This is actually wrong, because MachineInstr is being built during isel phase and the associated information is not completed yet. A quick search shows none target other than BPF is access MachineInstr info during isel phase. For an PHI node, when you reached it during isel phase, it may have all predecessors linked, but not successors. It seems successors are linked to PHI node only when doing SelectionDAGISel::FinishBasicBlock and this happens later than PreprocessISelDAG hook. Previously, BPF program doesn't allow loop, there is probably the reason why this bug was not exposed. This patch therefore fixes the bug by the following approach: - The existing truncation elimination code and the associated "load_to_vreg_" records are removed. - Instead, implement truncation elimination using MachineSSA pass, this is where all information are built, and keep the pass together with other similar peephole optimizations inside BPFMIPeephole.cpp. Redundant move elimination logic is updated accordingly. - Unit testcase included + no compilation errors for kernel BPF selftest. Patch Review === Patch was sent to and reviewed by BPF community at: https://lore.kernel.org/bpf Reported-by: David Beckett <david.beckett@netronome.com> Reviewed-by: Yonghong Song <yhs@fb.com> Signed-off-by: Jiong Wang <jiong.wang@netronome.com> llvm-svn: 375007
2019-10-16 17:27:59 +02:00
STATISTIC(TruncElemNum, "Number of truncation eliminated");
namespace {
struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
static char ID;
const BPFInstrInfo *TII;
MachineFunction *MF;
MachineRegisterInfo *MRI;
BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
}
private:
// Initialize class variables.
void initialize(MachineFunction &MFParm);
bool eliminateTruncSeq(void);
public:
// Main entry point for this pass.
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
initialize(MF);
return eliminateTruncSeq();
}
};
static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
{
if (TruncSize == 1)
return opcode == BPF::LDB || opcode == BPF::LDB32;
if (TruncSize == 2)
return opcode == BPF::LDH || opcode == BPF::LDH32;
if (TruncSize == 4)
return opcode == BPF::LDW || opcode == BPF::LDW32;
return false;
}
// Initialize class variables.
void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
}
// Reg truncating is often the result of 8/16/32bit->64bit or
// 8/16bit->32bit conversion. If the reg value is loaded with
// masked byte width, the AND operation can be removed since
// BPF LOAD already has zero extension.
//
// This also solved a correctness issue.
// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
// are 32-bit registers, but later on, kernel verifier will rewrite
// it with 64-bit value. Therefore, truncating the value after the
// load will result in incorrect code.
bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
MachineInstr* ToErase = nullptr;
bool Eliminated = false;
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB) {
// The second insn to remove if the eliminate candidate is a pair.
MachineInstr *MI2 = nullptr;
Register DstReg, SrcReg;
MachineInstr *DefMI;
int TruncSize = -1;
// If the previous instruction was marked for elimination, remove it now.
if (ToErase) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
// AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
// for BPF ANDI is i32, and this case only happens on ALU64.
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
SrcReg = MI.getOperand(1).getReg();
MI2 = MRI->getVRegDef(SrcReg);
DstReg = MI.getOperand(0).getReg();
if (!MI2 ||
MI2->getOpcode() != BPF::SLL_ri ||
MI2->getOperand(2).getImm() != 32)
continue;
// Update SrcReg.
SrcReg = MI2->getOperand(1).getReg();
DefMI = MRI->getVRegDef(SrcReg);
if (DefMI)
TruncSize = 4;
} else if (MI.getOpcode() == BPF::AND_ri ||
MI.getOpcode() == BPF::AND_ri_32) {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
DefMI = MRI->getVRegDef(SrcReg);
if (!DefMI)
continue;
int64_t imm = MI.getOperand(2).getImm();
if (imm == 0xff)
TruncSize = 1;
else if (imm == 0xffff)
TruncSize = 2;
}
if (TruncSize == -1)
continue;
// The definition is PHI node, check all inputs.
if (DefMI->isPHI()) {
bool CheckFail = false;
for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
MachineOperand &opnd = DefMI->getOperand(i);
if (!opnd.isReg()) {
CheckFail = true;
break;
}
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
if (!PhiDef || PhiDef->isPHI() ||
!TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
CheckFail = true;
break;
}
}
if (CheckFail)
continue;
} else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
continue;
}
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
.addReg(SrcReg);
if (MI2)
MI2->eraseFromParent();
// Mark it to ToErase, and erase in the next iteration.
ToErase = &MI;
TruncElemNum++;
Eliminated = true;
}
}
return Eliminated;
}
} // end default namespace
INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
"BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
false, false)
char BPFMIPeepholeTruncElim::ID = 0;
FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
{
return new BPFMIPeepholeTruncElim();
}