mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SystemZ] Generate XC loop for memset 0 of variable length.
Benchmarking has shown that it is worthwhile to implement a variable length memset of 0 with XC (exclusive or) like gcc does, instead of using a libcall. This requires the use of the EXecute Relative Long (EXRL) instruction which can now be done in a framework that can also be used with other target instructions (not just XC). Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D103865
This commit is contained in:
parent
28343f1971
commit
f20b0fa5e7
@ -541,6 +541,30 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|||||||
LowerPATCHPOINT(*MI, Lower);
|
LowerPATCHPOINT(*MI, Lower);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case SystemZ::EXRL_Pseudo: {
|
||||||
|
unsigned TargetInsOpc = MI->getOperand(0).getImm();
|
||||||
|
Register LenMinus1Reg = MI->getOperand(1).getReg();
|
||||||
|
Register DestReg = MI->getOperand(2).getReg();
|
||||||
|
int64_t DestDisp = MI->getOperand(3).getImm();
|
||||||
|
Register SrcReg = MI->getOperand(4).getReg();
|
||||||
|
int64_t SrcDisp = MI->getOperand(5).getImm();
|
||||||
|
|
||||||
|
MCSymbol *DotSym = nullptr;
|
||||||
|
MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg)
|
||||||
|
.addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp);
|
||||||
|
MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
|
||||||
|
EXRLT2SymMap::iterator I = EXRLTargets2Sym.find(ET_STI);
|
||||||
|
if (I != EXRLTargets2Sym.end())
|
||||||
|
DotSym = I->second;
|
||||||
|
else
|
||||||
|
EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
|
||||||
|
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
|
||||||
|
EmitToStreamer(
|
||||||
|
*OutStreamer,
|
||||||
|
MCInstBuilder(SystemZ::EXRL).addReg(LenMinus1Reg).addExpr(Dot));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
Lower.lower(MI, LoweredMI);
|
Lower.lower(MI, LoweredMI);
|
||||||
break;
|
break;
|
||||||
@ -698,6 +722,19 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
|
|||||||
getSubtargetInfo());
|
getSubtargetInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SystemZAsmPrinter::emitEXRLTargetInstructions() {
|
||||||
|
if (EXRLTargets2Sym.empty())
|
||||||
|
return;
|
||||||
|
// Switch to the .text section.
|
||||||
|
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
|
||||||
|
for (auto &I : EXRLTargets2Sym) {
|
||||||
|
OutStreamer->emitLabel(I.second);
|
||||||
|
const MCInstSTIPair &MCI_STI = I.first;
|
||||||
|
OutStreamer->emitInstruction(MCI_STI.first, *MCI_STI.second);
|
||||||
|
}
|
||||||
|
EXRLTargets2Sym.clear();
|
||||||
|
}
|
||||||
|
|
||||||
// Convert a SystemZ-specific constant pool modifier into the associated
|
// Convert a SystemZ-specific constant pool modifier into the associated
|
||||||
// MCSymbolRefExpr variant kind.
|
// MCSymbolRefExpr variant kind.
|
||||||
static MCSymbolRefExpr::VariantKind
|
static MCSymbolRefExpr::VariantKind
|
||||||
@ -746,6 +783,7 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
|
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
|
||||||
|
emitEXRLTargetInstructions();
|
||||||
emitStackMaps(SM);
|
emitStackMaps(SM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,10 +9,11 @@
|
|||||||
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
|
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
|
||||||
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
|
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
|
||||||
|
|
||||||
#include "SystemZTargetMachine.h"
|
|
||||||
#include "SystemZMCInstLower.h"
|
#include "SystemZMCInstLower.h"
|
||||||
|
#include "SystemZTargetMachine.h"
|
||||||
#include "llvm/CodeGen/AsmPrinter.h"
|
#include "llvm/CodeGen/AsmPrinter.h"
|
||||||
#include "llvm/CodeGen/StackMaps.h"
|
#include "llvm/CodeGen/StackMaps.h"
|
||||||
|
#include "llvm/MC/MCInstBuilder.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
|
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
@ -26,6 +27,33 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
|
|||||||
private:
|
private:
|
||||||
StackMaps SM;
|
StackMaps SM;
|
||||||
|
|
||||||
|
typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
|
||||||
|
struct CmpMCInst {
|
||||||
|
bool operator()(const MCInstSTIPair &MCI_STI_A,
|
||||||
|
const MCInstSTIPair &MCI_STI_B) const {
|
||||||
|
if (MCI_STI_A.second != MCI_STI_B.second)
|
||||||
|
return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
|
||||||
|
const MCInst &A = MCI_STI_A.first;
|
||||||
|
const MCInst &B = MCI_STI_B.first;
|
||||||
|
assert(A.getNumOperands() == B.getNumOperands() &&
|
||||||
|
A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
|
||||||
|
B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
|
||||||
|
if (A.getOpcode() != B.getOpcode())
|
||||||
|
return A.getOpcode() < B.getOpcode();
|
||||||
|
if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
|
||||||
|
return A.getOperand(0).getReg() < B.getOperand(0).getReg();
|
||||||
|
if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
|
||||||
|
return A.getOperand(1).getImm() < B.getOperand(1).getImm();
|
||||||
|
if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
|
||||||
|
return A.getOperand(3).getReg() < B.getOperand(3).getReg();
|
||||||
|
if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
|
||||||
|
return A.getOperand(4).getImm() < B.getOperand(4).getImm();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
|
||||||
|
EXRLT2SymMap EXRLTargets2Sym;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
|
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
|
||||||
: AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
|
: AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
|
||||||
@ -49,6 +77,7 @@ private:
|
|||||||
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
|
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
|
||||||
void LowerSTACKMAP(const MachineInstr &MI);
|
void LowerSTACKMAP(const MachineInstr &MI);
|
||||||
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
|
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
|
||||||
|
void emitEXRLTargetInstructions();
|
||||||
};
|
};
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
|
||||||
|
@ -7795,43 +7795,89 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||||||
uint64_t DestDisp = MI.getOperand(1).getImm();
|
uint64_t DestDisp = MI.getOperand(1).getImm();
|
||||||
MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
|
MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
|
||||||
uint64_t SrcDisp = MI.getOperand(3).getImm();
|
uint64_t SrcDisp = MI.getOperand(3).getImm();
|
||||||
uint64_t Length = MI.getOperand(4).getImm();
|
MachineOperand &LengthMO = MI.getOperand(4);
|
||||||
|
uint64_t ImmLength = LengthMO.isImm() ? LengthMO.getImm() : 0;
|
||||||
|
Register LenMinus1Reg =
|
||||||
|
LengthMO.isReg() ? LengthMO.getReg() : SystemZ::NoRegister;
|
||||||
|
|
||||||
// When generating more than one CLC, all but the last will need to
|
// When generating more than one CLC, all but the last will need to
|
||||||
// branch to the end when a difference is found.
|
// branch to the end when a difference is found.
|
||||||
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
|
MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
|
||||||
SystemZ::splitBlockAfter(MI, MBB) : nullptr);
|
? SystemZ::splitBlockAfter(MI, MBB)
|
||||||
|
: nullptr);
|
||||||
|
|
||||||
// Check for the loop form, in which operand 5 is the trip count.
|
// Check for the loop form, in which operand 5 is the trip count.
|
||||||
if (MI.getNumExplicitOperands() > 5) {
|
if (MI.getNumExplicitOperands() > 5) {
|
||||||
bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
|
|
||||||
|
|
||||||
Register StartCountReg = MI.getOperand(5).getReg();
|
Register StartCountReg = MI.getOperand(5).getReg();
|
||||||
Register StartSrcReg = forceReg(MI, SrcBase, TII);
|
|
||||||
Register StartDestReg = (HaveSingleBase ? StartSrcReg :
|
MachineBasicBlock *StartMBB = nullptr;
|
||||||
forceReg(MI, DestBase, TII));
|
MachineBasicBlock *LoopMBB = nullptr;
|
||||||
|
MachineBasicBlock *NextMBB = nullptr;
|
||||||
|
MachineBasicBlock *DoneMBB = nullptr;
|
||||||
|
MachineBasicBlock *AllDoneMBB = nullptr;
|
||||||
|
|
||||||
|
bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
|
||||||
|
Register StartSrcReg = forceReg(MI, SrcBase, TII);
|
||||||
|
Register StartDestReg =
|
||||||
|
(HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
|
||||||
|
|
||||||
const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
|
const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
|
||||||
Register ThisSrcReg = MRI.createVirtualRegister(RC);
|
Register ThisSrcReg = MRI.createVirtualRegister(RC);
|
||||||
Register ThisDestReg = (HaveSingleBase ? ThisSrcReg :
|
Register ThisDestReg =
|
||||||
MRI.createVirtualRegister(RC));
|
(HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
|
||||||
Register NextSrcReg = MRI.createVirtualRegister(RC);
|
Register NextSrcReg = MRI.createVirtualRegister(RC);
|
||||||
Register NextDestReg = (HaveSingleBase ? NextSrcReg :
|
Register NextDestReg =
|
||||||
MRI.createVirtualRegister(RC));
|
(HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
|
||||||
|
|
||||||
RC = &SystemZ::GR64BitRegClass;
|
RC = &SystemZ::GR64BitRegClass;
|
||||||
Register ThisCountReg = MRI.createVirtualRegister(RC);
|
Register ThisCountReg = MRI.createVirtualRegister(RC);
|
||||||
Register NextCountReg = MRI.createVirtualRegister(RC);
|
Register NextCountReg = MRI.createVirtualRegister(RC);
|
||||||
|
|
||||||
MachineBasicBlock *StartMBB = MBB;
|
if (LengthMO.isReg()) {
|
||||||
MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||||
MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
StartMBB = SystemZ::emitBlockAfter(MBB);
|
||||||
MachineBasicBlock *NextMBB =
|
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||||
(EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
|
NextMBB = LoopMBB;
|
||||||
|
DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
|
||||||
|
|
||||||
// StartMBB:
|
// MBB:
|
||||||
// # fall through to LoopMMB
|
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
|
||||||
MBB->addSuccessor(LoopMBB);
|
BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
|
||||||
|
.addReg(LenMinus1Reg).addImm(-1);
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||||
|
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
|
||||||
|
.addMBB(AllDoneMBB);
|
||||||
|
MBB->addSuccessor(AllDoneMBB);
|
||||||
|
MBB->addSuccessor(StartMBB);
|
||||||
|
|
||||||
|
// StartMBB:
|
||||||
|
// # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
|
||||||
|
MBB = StartMBB;
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
|
||||||
|
.addReg(StartCountReg).addImm(0);
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||||
|
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
|
||||||
|
.addMBB(DoneMBB);
|
||||||
|
MBB->addSuccessor(DoneMBB);
|
||||||
|
MBB->addSuccessor(LoopMBB);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
StartMBB = MBB;
|
||||||
|
DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||||
|
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
|
||||||
|
NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
|
||||||
|
|
||||||
|
// StartMBB:
|
||||||
|
// # fall through to LoopMBB
|
||||||
|
MBB->addSuccessor(LoopMBB);
|
||||||
|
|
||||||
|
DestBase = MachineOperand::CreateReg(NextDestReg, false);
|
||||||
|
SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
|
||||||
|
ImmLength &= 255;
|
||||||
|
if (EndMBB && !ImmLength)
|
||||||
|
// If the loop handled the whole CLC range, DoneMBB will be empty with
|
||||||
|
// CC live-through into EndMBB, so add it as live-in.
|
||||||
|
DoneMBB->addLiveIn(SystemZ::CC);
|
||||||
|
}
|
||||||
|
|
||||||
// LoopMBB:
|
// LoopMBB:
|
||||||
// %ThisDestReg = phi [ %StartDestReg, StartMBB ],
|
// %ThisDestReg = phi [ %StartDestReg, StartMBB ],
|
||||||
@ -7846,7 +7892,6 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||||||
//
|
//
|
||||||
// The prefetch is used only for MVC. The JLH is used only for CLC.
|
// The prefetch is used only for MVC. The JLH is used only for CLC.
|
||||||
MBB = LoopMBB;
|
MBB = LoopMBB;
|
||||||
|
|
||||||
BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
|
BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
|
||||||
.addReg(StartDestReg).addMBB(StartMBB)
|
.addReg(StartDestReg).addMBB(StartMBB)
|
||||||
.addReg(NextDestReg).addMBB(NextMBB);
|
.addReg(NextDestReg).addMBB(NextMBB);
|
||||||
@ -7882,7 +7927,6 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||||||
//
|
//
|
||||||
// The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
|
// The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
|
||||||
MBB = NextMBB;
|
MBB = NextMBB;
|
||||||
|
|
||||||
BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
|
BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
|
||||||
.addReg(ThisDestReg).addImm(256).addReg(0);
|
.addReg(ThisDestReg).addImm(256).addReg(0);
|
||||||
if (!HaveSingleBase)
|
if (!HaveSingleBase)
|
||||||
@ -7898,18 +7942,39 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||||||
MBB->addSuccessor(LoopMBB);
|
MBB->addSuccessor(LoopMBB);
|
||||||
MBB->addSuccessor(DoneMBB);
|
MBB->addSuccessor(DoneMBB);
|
||||||
|
|
||||||
DestBase = MachineOperand::CreateReg(NextDestReg, false);
|
|
||||||
SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
|
|
||||||
Length &= 255;
|
|
||||||
if (EndMBB && !Length)
|
|
||||||
// If the loop handled the whole CLC range, DoneMBB will be empty with
|
|
||||||
// CC live-through into EndMBB, so add it as live-in.
|
|
||||||
DoneMBB->addLiveIn(SystemZ::CC);
|
|
||||||
MBB = DoneMBB;
|
MBB = DoneMBB;
|
||||||
|
if (LengthMO.isReg()) {
|
||||||
|
// DoneMBB:
|
||||||
|
// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
|
||||||
|
// # Use EXecute Relative Long for the remainder of the bytes. The target
|
||||||
|
// instruction of the EXRL will have a length field of 1 since 0 is an
|
||||||
|
// illegal value. The number of bytes processed becomes (%LenMinus1Reg &
|
||||||
|
// 0xff) + 1.
|
||||||
|
// # Fall through to AllDoneMBB.
|
||||||
|
Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
|
||||||
|
Register RemDestReg = HaveSingleBase ? RemSrcReg
|
||||||
|
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
|
||||||
|
.addReg(StartDestReg).addMBB(StartMBB)
|
||||||
|
.addReg(NextDestReg).addMBB(LoopMBB);
|
||||||
|
if (!HaveSingleBase)
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
|
||||||
|
.addReg(StartSrcReg).addMBB(StartMBB)
|
||||||
|
.addReg(NextSrcReg).addMBB(LoopMBB);
|
||||||
|
MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
|
||||||
|
BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
|
||||||
|
.addImm(Opcode)
|
||||||
|
.addReg(LenMinus1Reg)
|
||||||
|
.addReg(RemDestReg).addImm(DestDisp)
|
||||||
|
.addReg(RemSrcReg).addImm(SrcDisp);
|
||||||
|
MBB->addSuccessor(AllDoneMBB);
|
||||||
|
MBB = AllDoneMBB;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle any remaining bytes with straight-line code.
|
// Handle any remaining bytes with straight-line code.
|
||||||
while (Length > 0) {
|
while (ImmLength > 0) {
|
||||||
uint64_t ThisLength = std::min(Length, uint64_t(256));
|
uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
|
||||||
// The previous iteration might have created out-of-range displacements.
|
// The previous iteration might have created out-of-range displacements.
|
||||||
// Apply them using LAY if so.
|
// Apply them using LAY if so.
|
||||||
if (!isUInt<12>(DestDisp)) {
|
if (!isUInt<12>(DestDisp)) {
|
||||||
@ -7939,10 +8004,10 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
|
|||||||
.setMemRefs(MI.memoperands());
|
.setMemRefs(MI.memoperands());
|
||||||
DestDisp += ThisLength;
|
DestDisp += ThisLength;
|
||||||
SrcDisp += ThisLength;
|
SrcDisp += ThisLength;
|
||||||
Length -= ThisLength;
|
ImmLength -= ThisLength;
|
||||||
// If there's another CLC to go, branch to the end if a difference
|
// If there's another CLC to go, branch to the end if a difference
|
||||||
// was found.
|
// was found.
|
||||||
if (EndMBB && Length > 0) {
|
if (EndMBB && ImmLength > 0) {
|
||||||
MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
|
MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
|
||||||
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
|
||||||
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
|
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
|
||||||
@ -8433,6 +8498,7 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
|
|||||||
return emitMemMemWrapper(MI, MBB, SystemZ::OC);
|
return emitMemMemWrapper(MI, MBB, SystemZ::OC);
|
||||||
case SystemZ::XCSequence:
|
case SystemZ::XCSequence:
|
||||||
case SystemZ::XCLoop:
|
case SystemZ::XCLoop:
|
||||||
|
case SystemZ::XCLoopVarLen:
|
||||||
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
|
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
|
||||||
case SystemZ::CLCSequence:
|
case SystemZ::CLCSequence:
|
||||||
case SystemZ::CLCLoop:
|
case SystemZ::CLCLoop:
|
||||||
|
@ -5253,6 +5253,7 @@ multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
|
|||||||
// The Sequence form uses a straight-line sequence of instructions and
|
// The Sequence form uses a straight-line sequence of instructions and
|
||||||
// the Loop form uses a loop of length-256 instructions followed by
|
// the Loop form uses a loop of length-256 instructions followed by
|
||||||
// another instruction to handle the excess.
|
// another instruction to handle the excess.
|
||||||
|
// The LoopVarLen form is for a loop with a non-constant length parameter.
|
||||||
multiclass MemorySS<string mnemonic, bits<8> opcode,
|
multiclass MemorySS<string mnemonic, bits<8> opcode,
|
||||||
SDPatternOperator sequence, SDPatternOperator loop> {
|
SDPatternOperator sequence, SDPatternOperator loop> {
|
||||||
def "" : SideEffectBinarySSa<mnemonic, opcode>;
|
def "" : SideEffectBinarySSa<mnemonic, opcode>;
|
||||||
@ -5265,6 +5266,10 @@ multiclass MemorySS<string mnemonic, bits<8> opcode,
|
|||||||
imm64:$length, GR64:$count256),
|
imm64:$length, GR64:$count256),
|
||||||
[(loop bdaddr12only:$dest, bdaddr12only:$src,
|
[(loop bdaddr12only:$dest, bdaddr12only:$src,
|
||||||
imm64:$length, GR64:$count256)]>;
|
imm64:$length, GR64:$count256)]>;
|
||||||
|
def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
|
||||||
|
GR64:$length, GR64:$count256),
|
||||||
|
[(loop bdaddr12only:$dest, bdaddr12only:$src,
|
||||||
|
GR64:$length, GR64:$count256)]>;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2165,8 +2165,12 @@ let Predicates = [FeatureDeflateConversion],
|
|||||||
|
|
||||||
// Execute.
|
// Execute.
|
||||||
let hasSideEffects = 1 in {
|
let hasSideEffects = 1 in {
|
||||||
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
|
def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>;
|
||||||
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
|
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>;
|
||||||
|
let hasNoSchedulingInfo = 1 in
|
||||||
|
def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
|
||||||
|
bdaddr12only:$bdl1, bdaddr12only:$bd2),
|
||||||
|
[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -81,11 +81,12 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
|
|||||||
if (IsVolatile)
|
if (IsVolatile)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
|
auto *CByte = dyn_cast<ConstantSDNode>(Byte);
|
||||||
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
|
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
|
||||||
uint64_t Bytes = CSize->getZExtValue();
|
uint64_t Bytes = CSize->getZExtValue();
|
||||||
if (Bytes == 0)
|
if (Bytes == 0)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) {
|
if (CByte) {
|
||||||
// Handle cases that can be done using at most two of
|
// Handle cases that can be done using at most two of
|
||||||
// MVI, MVHI, MVHHI and MVGHI. The latter two can only be
|
// MVI, MVHI, MVHHI and MVGHI. The latter two can only be
|
||||||
// used if ByteVal is all zeros or all ones; in other casees,
|
// used if ByteVal is all zeros or all ones; in other casees,
|
||||||
@ -125,7 +126,6 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
|
|||||||
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
|
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
|
||||||
|
|
||||||
// Handle the special case of a memset of 0, which can use XC.
|
// Handle the special case of a memset of 0, which can use XC.
|
||||||
auto *CByte = dyn_cast<ConstantSDNode>(Byte);
|
|
||||||
if (CByte && CByte->getZExtValue() == 0)
|
if (CByte && CByte->getZExtValue() == 0)
|
||||||
return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
|
return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
|
||||||
Chain, Dst, Dst, Bytes);
|
Chain, Dst, Dst, Bytes);
|
||||||
@ -138,6 +138,18 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
|
|||||||
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
|
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
|
||||||
Chain, DstPlus1, Dst, Bytes - 1);
|
Chain, DstPlus1, Dst, Bytes - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Variable length
|
||||||
|
if (CByte && CByte->getZExtValue() == 0) {
|
||||||
|
// Handle the special case of a variable length memset of 0 with XC.
|
||||||
|
SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
|
||||||
|
DAG.getZExtOrTrunc(Size, DL, MVT::i64),
|
||||||
|
DAG.getConstant(-1, DL, MVT::i64));
|
||||||
|
SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
|
||||||
|
DAG.getConstant(8, DL, MVT::i64));
|
||||||
|
return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
|
||||||
|
LenMinus1, TripC);
|
||||||
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
101
test/CodeGen/SystemZ/memset-05.ll
Normal file
101
test/CodeGen/SystemZ/memset-05.ll
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
; Test memset 0 with variable length
|
||||||
|
;
|
||||||
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
||||||
|
|
||||||
|
define void @fun0(i8* %Addr, i64 %Len) {
|
||||||
|
; CHECK-LABEL: fun0:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: aghi %r3, -1
|
||||||
|
; CHECK-NEXT: cgibe %r3, -1, 0(%r14)
|
||||||
|
; CHECK-NEXT: .LBB0_1:
|
||||||
|
; CHECK-NEXT: srlg %r0, %r3, 8
|
||||||
|
; CHECK-NEXT: cgije %r0, 0, .LBB0_3
|
||||||
|
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
||||||
|
; CHECK-NEXT: la %r2, 256(%r2)
|
||||||
|
; CHECK-NEXT: brctg %r0, .LBB0_2
|
||||||
|
; CHECK-NEXT: .LBB0_3:
|
||||||
|
; CHECK-NEXT: exrl %r3, .Ltmp0
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
tail call void @llvm.memset.p0i8.i64(i8* %Addr, i8 0, i64 %Len, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @fun1(i8* %Addr, i32 %Len) {
|
||||||
|
; CHECK-LABEL: fun1:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: llgfr %r1, %r3
|
||||||
|
; CHECK-NEXT: aghi %r1, -1
|
||||||
|
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
|
||||||
|
; CHECK-NEXT: .LBB1_1:
|
||||||
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
||||||
|
; CHECK-NEXT: cgije %r0, 0, .LBB1_3
|
||||||
|
; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
||||||
|
; CHECK-NEXT: la %r2, 256(%r2)
|
||||||
|
; CHECK-NEXT: brctg %r0, .LBB1_2
|
||||||
|
; CHECK-NEXT: .LBB1_3:
|
||||||
|
; CHECK-NEXT: exrl %r1, .Ltmp0
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Test that identical target instructions get reused.
|
||||||
|
define void @fun2(i8* %Addr, i32 %Len) {
|
||||||
|
; CHECK-LABEL: fun2:
|
||||||
|
; CHECK: # %bb.0:
|
||||||
|
; CHECK-NEXT: llgfr %r1, %r3
|
||||||
|
; CHECK-NEXT: aghi %r1, -1
|
||||||
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
||||||
|
; CHECK-NEXT: cgije %r1, -1, .LBB2_5
|
||||||
|
; CHECK-NEXT: # %bb.1:
|
||||||
|
; CHECK-NEXT: lgr %r3, %r2
|
||||||
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_4
|
||||||
|
; CHECK-NEXT: # %bb.2:
|
||||||
|
; CHECK-NEXT: lgr %r3, %r2
|
||||||
|
; CHECK-NEXT: lgr %r4, %r0
|
||||||
|
; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
|
||||||
|
; CHECK-NEXT: la %r3, 256(%r3)
|
||||||
|
; CHECK-NEXT: brctg %r4, .LBB2_3
|
||||||
|
; CHECK-NEXT: .LBB2_4:
|
||||||
|
; CHECK-NEXT: exrl %r1, .Ltmp1
|
||||||
|
; CHECK-NEXT: .LBB2_5:
|
||||||
|
; CHECK-NEXT: cgije %r1, -1, .LBB2_10
|
||||||
|
; CHECK-NEXT: # %bb.6:
|
||||||
|
; CHECK-NEXT: lgr %r3, %r2
|
||||||
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_9
|
||||||
|
; CHECK-NEXT: # %bb.7:
|
||||||
|
; CHECK-NEXT: lgr %r3, %r2
|
||||||
|
; CHECK-NEXT: lgr %r4, %r0
|
||||||
|
; CHECK-NEXT: .LBB2_8: # =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
|
||||||
|
; CHECK-NEXT: la %r3, 256(%r3)
|
||||||
|
; CHECK-NEXT: brctg %r4, .LBB2_8
|
||||||
|
; CHECK-NEXT: .LBB2_9:
|
||||||
|
; CHECK-NEXT: exrl %r1, .Ltmp1
|
||||||
|
; CHECK-NEXT: .LBB2_10:
|
||||||
|
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
|
||||||
|
; CHECK-NEXT: .LBB2_11:
|
||||||
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_13
|
||||||
|
; CHECK-NEXT: .LBB2_12: # =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
||||||
|
; CHECK-NEXT: la %r2, 256(%r2)
|
||||||
|
; CHECK-NEXT: brctg %r0, .LBB2_12
|
||||||
|
; CHECK-NEXT: .LBB2_13:
|
||||||
|
; CHECK-NEXT: exrl %r1, .Ltmp0
|
||||||
|
; CHECK-NEXT: br %r14
|
||||||
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
||||||
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
||||||
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: .Ltmp0:
|
||||||
|
; CHECK-NEXT: xc 0(1,%r2), 0(%r2)
|
||||||
|
; CHECK-NEXT: .Ltmp1:
|
||||||
|
; CHECK-NEXT: xc 0(1,%r3), 0(%r3)
|
||||||
|
|
||||||
|
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|
||||||
|
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
|
Loading…
Reference in New Issue
Block a user