mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
0cfb2248a6
Commit 13f6c81c5d9a ("[BPF] simplify zero extension with MOV_32_64") tried to use MOV_32_64 instructions instead of lshift/rshift instructions for zero extension. This has the benefit to remove the number of instructions and may help verifier too. But the same commit also removed the old MOV_32_64 pruning as it deems unsafe as MOV_32_64 does have the side effect, zeroing out the top 32bit in the register. This caused the following failure in kernel selftest test_cls_redirect.o. In linux kernel, we have struct __sk_buff { __u32 data; __u32 data_end; }; The compiler will generate 32bit load for __sk_buff->data and __sk_buff->data_end. But kernel verifier will actually loads an address (64bit address on 64bit kernel) to the result register. In this particular example, the explicit zext was not optimized away and destroyed top 32bit address and the verifier rejected the program : w2 = *(u32 *)(r1 + 76) ... r2 = w2 /* MOV_32_64: this will clear top 32bit */ Currently, if the load and the zext are next to each other, the instruction pattern match can actually capture this to avoid MOV_32_64, e.g., in BPFInstrInfo.td, we have def : Pat<(i64 (zextloadi32 ADDRri:$src)), (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>; However, if they are not next to each other, LDW32 and MOV_32_64 are generated, which may cause the above mentioned problem. BPF Backend already tried to optimize away pattern mov_32_64 + lshift + rshift Commit 13f6c81c5d9a may generate mov_32_64 not followed by shifts. This patch added optimization for only mov_32_64 too. Differential Revision: https://reviews.llvm.org/D81048
562 lines
15 KiB
C++
562 lines
15 KiB
C++
//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass performs peephole optimizations to cleanup ugly code sequences at
|
|
// MachineInstruction layer.
|
|
//
|
|
// Currently, there are two optimizations implemented:
|
|
// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
|
|
// zero extend 32-bit subregisters to 64-bit registers, if the compiler
|
|
// could prove the subregisters is defined by 32-bit operations in which
|
|
// case the upper half of the underlying 64-bit registers were zeroed
|
|
// implicitly.
|
|
//
|
|
// - One post-RA PreEmit pass to do final cleanup on some redundant
|
|
// instructions generated due to bad RA on subregister.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "BPF.h"
|
|
#include "BPFInstrInfo.h"
|
|
#include "BPFTargetMachine.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include <set>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "bpf-mi-zext-elim"
|
|
|
|
STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
|
|
|
|
namespace {
|
|
|
|
struct BPFMIPeephole : public MachineFunctionPass {
|
|
|
|
static char ID;
|
|
const BPFInstrInfo *TII;
|
|
MachineFunction *MF;
|
|
MachineRegisterInfo *MRI;
|
|
|
|
BPFMIPeephole() : MachineFunctionPass(ID) {
|
|
initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
private:
|
|
// Initialize class variables.
|
|
void initialize(MachineFunction &MFParm);
|
|
|
|
bool isCopyFrom32Def(MachineInstr *CopyMI);
|
|
bool isInsnFrom32Def(MachineInstr *DefInsn);
|
|
bool isPhiFrom32Def(MachineInstr *MovMI);
|
|
bool isMovFrom32Def(MachineInstr *MovMI);
|
|
bool eliminateZExtSeq(void);
|
|
bool eliminateZExt(void);
|
|
|
|
std::set<MachineInstr *> PhiInsns;
|
|
|
|
public:
|
|
|
|
// Main entry point for this pass.
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
initialize(MF);
|
|
|
|
// First try to eliminate (zext, lshift, rshift) and then
|
|
// try to eliminate zext.
|
|
bool ZExtSeqExist, ZExtExist;
|
|
ZExtSeqExist = eliminateZExtSeq();
|
|
ZExtExist = eliminateZExt();
|
|
return ZExtSeqExist || ZExtExist;
|
|
}
|
|
};
|
|
|
|
// Initialize class variables.
|
|
void BPFMIPeephole::initialize(MachineFunction &MFParm) {
|
|
MF = &MFParm;
|
|
MRI = &MF->getRegInfo();
|
|
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
|
|
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
|
|
}
|
|
|
|
bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
|
|
{
|
|
MachineOperand &opnd = CopyMI->getOperand(1);
|
|
|
|
if (!opnd.isReg())
|
|
return false;
|
|
|
|
// Return false if getting value from a 32bit physical register.
|
|
// Most likely, this physical register is aliased to
|
|
// function call return value or current function parameters.
|
|
Register Reg = opnd.getReg();
|
|
if (!Register::isVirtualRegister(Reg))
|
|
return false;
|
|
|
|
if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
|
|
return false;
|
|
|
|
MachineInstr *DefInsn = MRI->getVRegDef(Reg);
|
|
if (!isInsnFrom32Def(DefInsn))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
|
|
{
|
|
for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
|
|
MachineOperand &opnd = PhiMI->getOperand(i);
|
|
|
|
if (!opnd.isReg())
|
|
return false;
|
|
|
|
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
|
|
if (!PhiDef)
|
|
return false;
|
|
if (PhiDef->isPHI()) {
|
|
if (PhiInsns.find(PhiDef) != PhiInsns.end())
|
|
return false;
|
|
PhiInsns.insert(PhiDef);
|
|
if (!isPhiFrom32Def(PhiDef))
|
|
return false;
|
|
}
|
|
if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// The \p DefInsn instruction defines a virtual register.
|
|
bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
|
|
{
|
|
if (!DefInsn)
|
|
return false;
|
|
|
|
if (DefInsn->isPHI()) {
|
|
if (PhiInsns.find(DefInsn) != PhiInsns.end())
|
|
return false;
|
|
PhiInsns.insert(DefInsn);
|
|
if (!isPhiFrom32Def(DefInsn))
|
|
return false;
|
|
} else if (DefInsn->getOpcode() == BPF::COPY) {
|
|
if (!isCopyFrom32Def(DefInsn))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
|
|
{
|
|
MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
|
|
|
|
LLVM_DEBUG(dbgs() << " Def of Mov Src:");
|
|
LLVM_DEBUG(DefInsn->dump());
|
|
|
|
PhiInsns.clear();
|
|
if (!isInsnFrom32Def(DefInsn))
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BPFMIPeephole::eliminateZExtSeq(void) {
|
|
MachineInstr* ToErase = nullptr;
|
|
bool Eliminated = false;
|
|
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
// If the previous instruction was marked for elimination, remove it now.
|
|
if (ToErase) {
|
|
ToErase->eraseFromParent();
|
|
ToErase = nullptr;
|
|
}
|
|
|
|
// Eliminate the 32-bit to 64-bit zero extension sequence when possible.
|
|
//
|
|
// MOV_32_64 rB, wA
|
|
// SLL_ri rB, rB, 32
|
|
// SRL_ri rB, rB, 32
|
|
if (MI.getOpcode() == BPF::SRL_ri &&
|
|
MI.getOperand(2).getImm() == 32) {
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
Register ShfReg = MI.getOperand(1).getReg();
|
|
MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
|
|
|
|
LLVM_DEBUG(dbgs() << "Starting SRL found:");
|
|
LLVM_DEBUG(MI.dump());
|
|
|
|
if (!SllMI ||
|
|
SllMI->isPHI() ||
|
|
SllMI->getOpcode() != BPF::SLL_ri ||
|
|
SllMI->getOperand(2).getImm() != 32)
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << " SLL found:");
|
|
LLVM_DEBUG(SllMI->dump());
|
|
|
|
MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
|
|
if (!MovMI ||
|
|
MovMI->isPHI() ||
|
|
MovMI->getOpcode() != BPF::MOV_32_64)
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << " Type cast Mov found:");
|
|
LLVM_DEBUG(MovMI->dump());
|
|
|
|
Register SubReg = MovMI->getOperand(1).getReg();
|
|
if (!isMovFrom32Def(MovMI)) {
|
|
LLVM_DEBUG(dbgs()
|
|
<< " One ZExt elim sequence failed qualifying elim.\n");
|
|
continue;
|
|
}
|
|
|
|
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
|
|
.addImm(0).addReg(SubReg).addImm(BPF::sub_32);
|
|
|
|
SllMI->eraseFromParent();
|
|
MovMI->eraseFromParent();
|
|
// MI is the right shift, we can't erase it in it's own iteration.
|
|
// Mark it to ToErase, and erase in the next iteration.
|
|
ToErase = &MI;
|
|
ZExtElemNum++;
|
|
Eliminated = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Eliminated;
|
|
}
|
|
|
|
bool BPFMIPeephole::eliminateZExt(void) {
|
|
MachineInstr* ToErase = nullptr;
|
|
bool Eliminated = false;
|
|
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
// If the previous instruction was marked for elimination, remove it now.
|
|
if (ToErase) {
|
|
ToErase->eraseFromParent();
|
|
ToErase = nullptr;
|
|
}
|
|
|
|
if (MI.getOpcode() != BPF::MOV_32_64)
|
|
continue;
|
|
|
|
// Eliminate MOV_32_64 if possible.
|
|
// MOV_32_64 rA, wB
|
|
//
|
|
// If wB has been zero extended, replace it with a SUBREG_TO_REG.
|
|
// This is to workaround BPF programs where pkt->{data, data_end}
|
|
// is encoded as u32, but actually the verifier populates them
|
|
// as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
|
|
LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
|
|
LLVM_DEBUG(MI.dump());
|
|
|
|
if (!isMovFrom32Def(&MI))
|
|
continue;
|
|
|
|
LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
|
|
|
|
Register dst = MI.getOperand(0).getReg();
|
|
Register src = MI.getOperand(1).getReg();
|
|
|
|
// Build a SUBREG_TO_REG instruction.
|
|
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
|
|
.addImm(0).addReg(src).addImm(BPF::sub_32);
|
|
|
|
ToErase = &MI;
|
|
Eliminated = true;
|
|
}
|
|
}
|
|
|
|
return Eliminated;
|
|
}
|
|
|
|
} // end default namespace
|
|
|
|
INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
|
|
"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
|
|
false, false)
|
|
|
|
char BPFMIPeephole::ID = 0;
|
|
FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
|
|
|
|
STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
|
|
|
|
namespace {
|
|
|
|
struct BPFMIPreEmitPeephole : public MachineFunctionPass {
|
|
|
|
static char ID;
|
|
MachineFunction *MF;
|
|
const TargetRegisterInfo *TRI;
|
|
|
|
BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
|
|
initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
private:
|
|
// Initialize class variables.
|
|
void initialize(MachineFunction &MFParm);
|
|
|
|
bool eliminateRedundantMov(void);
|
|
|
|
public:
|
|
|
|
// Main entry point for this pass.
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
initialize(MF);
|
|
|
|
return eliminateRedundantMov();
|
|
}
|
|
};
|
|
|
|
// Initialize class variables.
|
|
void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
|
|
MF = &MFParm;
|
|
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
|
|
LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
|
|
}
|
|
|
|
bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
|
|
MachineInstr* ToErase = nullptr;
|
|
bool Eliminated = false;
|
|
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
// If the previous instruction was marked for elimination, remove it now.
|
|
if (ToErase) {
|
|
LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");
|
|
LLVM_DEBUG(ToErase->dump());
|
|
ToErase->eraseFromParent();
|
|
ToErase = nullptr;
|
|
}
|
|
|
|
// Eliminate identical move:
|
|
//
|
|
// MOV rA, rA
|
|
//
|
|
// Note that we cannot remove
|
|
// MOV_32_64 rA, wA
|
|
// MOV_rr_32 wA, wA
|
|
// as these two instructions having side effects, zeroing out
|
|
// top 32 bits of rA.
|
|
unsigned Opcode = MI.getOpcode();
|
|
if (Opcode == BPF::MOV_rr) {
|
|
Register dst = MI.getOperand(0).getReg();
|
|
Register src = MI.getOperand(1).getReg();
|
|
|
|
if (dst != src)
|
|
continue;
|
|
|
|
ToErase = &MI;
|
|
RedundantMovElemNum++;
|
|
Eliminated = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return Eliminated;
|
|
}
|
|
|
|
} // end default namespace
|
|
|
|
INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
|
|
"BPF PreEmit Peephole Optimization", false, false)
|
|
|
|
char BPFMIPreEmitPeephole::ID = 0;
|
|
FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
|
|
{
|
|
return new BPFMIPreEmitPeephole();
|
|
}
|
|
|
|
STATISTIC(TruncElemNum, "Number of truncation eliminated");
|
|
|
|
namespace {
|
|
|
|
struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
|
|
|
|
static char ID;
|
|
const BPFInstrInfo *TII;
|
|
MachineFunction *MF;
|
|
MachineRegisterInfo *MRI;
|
|
|
|
BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
|
|
initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
private:
|
|
// Initialize class variables.
|
|
void initialize(MachineFunction &MFParm);
|
|
|
|
bool eliminateTruncSeq(void);
|
|
|
|
public:
|
|
|
|
// Main entry point for this pass.
|
|
bool runOnMachineFunction(MachineFunction &MF) override {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
initialize(MF);
|
|
|
|
return eliminateTruncSeq();
|
|
}
|
|
};
|
|
|
|
static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
|
|
{
|
|
if (TruncSize == 1)
|
|
return opcode == BPF::LDB || opcode == BPF::LDB32;
|
|
|
|
if (TruncSize == 2)
|
|
return opcode == BPF::LDH || opcode == BPF::LDH32;
|
|
|
|
if (TruncSize == 4)
|
|
return opcode == BPF::LDW || opcode == BPF::LDW32;
|
|
|
|
return false;
|
|
}
|
|
|
|
// Initialize class variables.
|
|
void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
|
|
MF = &MFParm;
|
|
MRI = &MF->getRegInfo();
|
|
TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
|
|
LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
|
|
}
|
|
|
|
// Reg truncating is often the result of 8/16/32bit->64bit or
|
|
// 8/16bit->32bit conversion. If the reg value is loaded with
|
|
// masked byte width, the AND operation can be removed since
|
|
// BPF LOAD already has zero extension.
|
|
//
|
|
// This also solved a correctness issue.
|
|
// In BPF socket-related program, e.g., __sk_buff->{data, data_end}
|
|
// are 32-bit registers, but later on, kernel verifier will rewrite
|
|
// it with 64-bit value. Therefore, truncating the value after the
|
|
// load will result in incorrect code.
|
|
bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
|
|
MachineInstr* ToErase = nullptr;
|
|
bool Eliminated = false;
|
|
|
|
for (MachineBasicBlock &MBB : *MF) {
|
|
for (MachineInstr &MI : MBB) {
|
|
// The second insn to remove if the eliminate candidate is a pair.
|
|
MachineInstr *MI2 = nullptr;
|
|
Register DstReg, SrcReg;
|
|
MachineInstr *DefMI;
|
|
int TruncSize = -1;
|
|
|
|
// If the previous instruction was marked for elimination, remove it now.
|
|
if (ToErase) {
|
|
ToErase->eraseFromParent();
|
|
ToErase = nullptr;
|
|
}
|
|
|
|
// AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
|
|
// for BPF ANDI is i32, and this case only happens on ALU64.
|
|
if (MI.getOpcode() == BPF::SRL_ri &&
|
|
MI.getOperand(2).getImm() == 32) {
|
|
SrcReg = MI.getOperand(1).getReg();
|
|
MI2 = MRI->getVRegDef(SrcReg);
|
|
DstReg = MI.getOperand(0).getReg();
|
|
|
|
if (!MI2 ||
|
|
MI2->getOpcode() != BPF::SLL_ri ||
|
|
MI2->getOperand(2).getImm() != 32)
|
|
continue;
|
|
|
|
// Update SrcReg.
|
|
SrcReg = MI2->getOperand(1).getReg();
|
|
DefMI = MRI->getVRegDef(SrcReg);
|
|
if (DefMI)
|
|
TruncSize = 4;
|
|
} else if (MI.getOpcode() == BPF::AND_ri ||
|
|
MI.getOpcode() == BPF::AND_ri_32) {
|
|
SrcReg = MI.getOperand(1).getReg();
|
|
DstReg = MI.getOperand(0).getReg();
|
|
DefMI = MRI->getVRegDef(SrcReg);
|
|
|
|
if (!DefMI)
|
|
continue;
|
|
|
|
int64_t imm = MI.getOperand(2).getImm();
|
|
if (imm == 0xff)
|
|
TruncSize = 1;
|
|
else if (imm == 0xffff)
|
|
TruncSize = 2;
|
|
}
|
|
|
|
if (TruncSize == -1)
|
|
continue;
|
|
|
|
// The definition is PHI node, check all inputs.
|
|
if (DefMI->isPHI()) {
|
|
bool CheckFail = false;
|
|
|
|
for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
|
|
MachineOperand &opnd = DefMI->getOperand(i);
|
|
if (!opnd.isReg()) {
|
|
CheckFail = true;
|
|
break;
|
|
}
|
|
|
|
MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
|
|
if (!PhiDef || PhiDef->isPHI() ||
|
|
!TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
|
|
CheckFail = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (CheckFail)
|
|
continue;
|
|
} else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
|
|
continue;
|
|
}
|
|
|
|
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
|
|
.addReg(SrcReg);
|
|
|
|
if (MI2)
|
|
MI2->eraseFromParent();
|
|
|
|
// Mark it to ToErase, and erase in the next iteration.
|
|
ToErase = &MI;
|
|
TruncElemNum++;
|
|
Eliminated = true;
|
|
}
|
|
}
|
|
|
|
return Eliminated;
|
|
}
|
|
|
|
} // end default namespace
|
|
|
|
INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
|
|
"BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
|
|
false, false)
|
|
|
|
char BPFMIPeepholeTruncElim::ID = 0;
|
|
FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
|
|
{
|
|
return new BPFMIPeepholeTruncElim();
|
|
}
|