1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00

[Hexagon] Cleanup and standardize vector load/store pseudo instructions

Remove the following single-vector load/store pseudo instructions, use real
instructions instead:
  LDriv_pseudo_V6         STriv_pseudo_V6
  LDriv_pseudo_V6_128B    STriv_pseudo_V6_128B
  LDrivv_indexed          STrivv_indexed
  LDrivv_indexed_128B     STrivv_indexed_128B

Rename the double-vector load/store pseudo instructions, add unaligned
counterparts:

  -- old --               -- new --            -- unaligned --
  LDrivv_pseudo_V6        PS_vloadrw_io        PS_vloadrwu_io
  LDrivv_pseudo_V6_128B   PS_vloadrw_io_128B   PS_vloadrwu_io_128B
  STrivv_pseudo_V6        PS_vstorerw_io       PS_vstorerwu_io
  STrivv_pseudo_V6_128B   PS_vstorerw_io_128   PS_vstorerwu_io_128

llvm-svn: 278564
This commit is contained in:
Krzysztof Parzyszek 2016-08-12 21:05:05 +00:00
parent c140ece1e5
commit 1af9276b8e
3 changed files with 187 additions and 243 deletions

View File

@ -1470,12 +1470,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
MachineInstr *MI = &*It;
if (!MI->getOperand(0).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
unsigned SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
assert(MI->getOperand(0).isFI() && "Expect a frame index");
int FI = MI->getOperand(0).getIndex();
// TmpR = C2_tfrpr SrcR if SrcR is a predicate register
@ -1502,11 +1503,12 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
MachineInstr *MI = &*It;
if (!MI->getOperand(1).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
unsigned DstR = MI->getOperand(0).getReg();
assert(MI->getOperand(1).isFI() && "Expect a frame index");
int FI = MI->getOperand(1).getIndex();
// TmpR = L2_loadri_io FI, 0
@ -1534,11 +1536,12 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
MachineInstr *MI = &*It;
if (!MI->getOperand(0).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
assert(MI->getOperand(0).isFI() && "Expect a frame index");
int FI = MI->getOperand(0).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1575,10 +1578,11 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
MachineInstr *MI = &*It;
if (!MI->getOperand(1).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned DstR = MI->getOperand(0).getReg();
assert(MI->getOperand(1).isFI() && "Expect a frame index");
int FI = MI->getOperand(1).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1616,14 +1620,14 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
MachineInstr *MI = &*It;
DebugLoc DL = MI->getDebugLoc();
if (!MI->getOperand(0).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::subreg_loreg);
unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::subreg_hireg);
bool IsKill = MI->getOperand(2).isKill();
assert(MI->getOperand(0).isFI() && "Expect a frame index");
int FI = MI->getOperand(0).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1670,13 +1674,13 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
MachineInstr *MI = &*It;
DebugLoc DL = MI->getDebugLoc();
if (!MI->getOperand(1).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned DstR = MI->getOperand(0).getReg();
unsigned DstHi = HRI.getSubReg(DstR, Hexagon::subreg_hireg);
unsigned DstLo = HRI.getSubReg(DstR, Hexagon::subreg_loreg);
assert(MI->getOperand(1).isFI() && "Expect a frame index");
int FI = MI->getOperand(1).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1720,12 +1724,12 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &MFI = MF.getFrameInfo();
MachineInstr *MI = &*It;
DebugLoc DL = MI->getDebugLoc();
if (!MI->getOperand(0).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
assert(MI->getOperand(0).isFI() && "Expect a frame index");
int FI = MI->getOperand(0).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1758,11 +1762,11 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &MFI = MF.getFrameInfo();
MachineInstr *MI = &*It;
if (!MI->getOperand(1).isFI())
return false;
DebugLoc DL = MI->getDebugLoc();
unsigned DstR = MI->getOperand(0).getReg();
assert(MI->getOperand(1).isFI() && "Expect a frame index");
int FI = MI->getOperand(1).getIndex();
bool Is128B = HST.useHVXDblOps();
@ -1823,22 +1827,18 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
case Hexagon::LDriq_pred_V6_128B:
Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
break;
case Hexagon::LDrivv_pseudo_V6:
case Hexagon::LDrivv_pseudo_V6_128B:
case Hexagon::PS_vloadrw_ai:
case Hexagon::PS_vloadrwu_ai:
case Hexagon::PS_vloadrw_ai_128B:
case Hexagon::PS_vloadrwu_ai_128B:
Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
break;
case Hexagon::STrivv_pseudo_V6:
case Hexagon::STrivv_pseudo_V6_128B:
case Hexagon::PS_vstorerw_ai:
case Hexagon::PS_vstorerwu_ai:
case Hexagon::PS_vstorerw_ai_128B:
case Hexagon::PS_vstorerwu_ai_128B:
Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
break;
case Hexagon::STriv_pseudo_V6:
case Hexagon::STriv_pseudo_V6_128B:
Changed |= expandStoreVec(B, I, MRI, HII, NewRegs);
break;
case Hexagon::LDriv_pseudo_V6:
case Hexagon::LDriv_pseudo_V6_128B:
Changed |= expandLoadVec(B, I, MRI, HII, NewRegs);
break;
}
}
}

View File

@ -253,12 +253,10 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case Hexagon::LDriw_mod:
case Hexagon::LDriq_pred_V6:
case Hexagon::LDriq_pred_vec_V6:
case Hexagon::LDriv_pseudo_V6:
case Hexagon::LDrivv_pseudo_V6:
case Hexagon::PS_vloadrw_ai:
case Hexagon::LDriq_pred_V6_128B:
case Hexagon::LDriq_pred_vec_V6_128B:
case Hexagon::LDriv_pseudo_V6_128B:
case Hexagon::LDrivv_pseudo_V6_128B: {
case Hexagon::PS_vloadrw_ai_128B: {
const MachineOperand OpFI = MI.getOperand(1);
if (!OpFI.isFI())
return 0;
@ -318,12 +316,10 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case Hexagon::STriw_mod:
case Hexagon::STriq_pred_V6:
case Hexagon::STriq_pred_vec_V6:
case Hexagon::STriv_pseudo_V6:
case Hexagon::STrivv_pseudo_V6:
case Hexagon::PS_vstorerw_ai:
case Hexagon::STriq_pred_V6_128B:
case Hexagon::STriq_pred_vec_V6_128B:
case Hexagon::STriv_pseudo_V6_128B:
case Hexagon::STrivv_pseudo_V6_128B: {
case Hexagon::PS_vstorerw_ai_128B: {
const MachineOperand &OpFI = MI.getOperand(0);
if (!OpFI.isFI())
return 0;
@ -904,23 +900,27 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating 128B vector spill");
BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6_128B))
unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B
: Hexagon::V6_vS32b_ai_128B;
BuildMI(MBB, I, DL, get(Opc))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating vector spill");
BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6))
unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai
: Hexagon::V6_vS32b_ai;
BuildMI(MBB, I, DL, get(Opc))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating double vector spill");
BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6))
unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai
: Hexagon::PS_vstorerw_ai;
BuildMI(MBB, I, DL, get(Opc))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating 128B double vector spill");
BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6_128B))
unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B
: Hexagon::PS_vstorerw_ai_128B;
BuildMI(MBB, I, DL, get(Opc))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else {
@ -960,20 +960,24 @@ void HexagonInstrInfo::loadRegFromStackSlot(
BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating 128B double vector restore");
BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6_128B), DestReg)
unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B
: Hexagon::PS_vloadrw_ai_128B;
BuildMI(MBB, I, DL, get(Opc), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating 128B vector restore");
BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6_128B), DestReg)
unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B
: Hexagon::V6_vL32b_ai_128B;
BuildMI(MBB, I, DL, get(Opc), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating vector restore");
BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6), DestReg)
unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai
: Hexagon::V6_vL32b_ai;
BuildMI(MBB, I, DL, get(Opc), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
DEBUG(dbgs() << "++Generating double vector restore");
BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6), DestReg)
unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai
: Hexagon::PS_vloadrw_ai;
BuildMI(MBB, I, DL, get(Opc), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else {
llvm_unreachable("Can't store this register to stack slot");
@ -994,7 +998,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
DebugLoc DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
const unsigned VecOffset = 1;
bool Is128B = false;
switch (Opc) {
case TargetOpcode::COPY: {
@ -1043,23 +1046,34 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MRI.clearKillFlags(SrcSubHi);
return true;
}
case Hexagon::STrivv_indexed_128B:
Is128B = true;
case Hexagon::STrivv_indexed: {
case Hexagon::PS_vstorerw_ai:
case Hexagon::PS_vstorerwu_ai:
case Hexagon::PS_vstorerw_ai_128B:
case Hexagon::PS_vstorerwu_ai_128B: {
bool Is128B = (Opc == Hexagon::PS_vstorerw_ai_128B ||
Opc == Hexagon::PS_vstorerwu_ai_128B);
bool Aligned = (Opc == Hexagon::PS_vstorerw_ai ||
Opc == Hexagon::PS_vstorerw_ai_128B);
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
: Hexagon::V6_vS32b_ai;
unsigned NewOpc;
if (Aligned)
NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
: Hexagon::V6_vS32b_ai;
else
NewOpc = Is128B ? Hexagon::V6_vS32Ub_ai_128B
: Hexagon::V6_vS32Ub_ai;
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
MachineInstr *MI1New =
BuildMI(MBB, MI, DL, get(NewOpcd))
BuildMI(MBB, MI, DL, get(NewOpc))
.addOperand(MI.getOperand(0))
.addImm(MI.getOperand(1).getImm())
.addReg(SrcSubLo)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI1New->getOperand(0).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpcd))
BuildMI(MBB, MI, DL, get(NewOpc))
.addOperand(MI.getOperand(0))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(1).getImm() + Offset)
@ -1068,22 +1082,31 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB.erase(MI);
return true;
}
case Hexagon::LDrivv_pseudo_V6_128B:
case Hexagon::LDrivv_indexed_128B:
Is128B = true;
case Hexagon::LDrivv_pseudo_V6:
case Hexagon::LDrivv_indexed: {
unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
: Hexagon::V6_vL32b_ai;
case Hexagon::PS_vloadrw_ai:
case Hexagon::PS_vloadrwu_ai:
case Hexagon::PS_vloadrw_ai_128B:
case Hexagon::PS_vloadrwu_ai_128B: {
bool Is128B = (Opc == Hexagon::PS_vloadrw_ai_128B ||
Opc == Hexagon::PS_vloadrwu_ai_128B);
bool Aligned = (Opc == Hexagon::PS_vloadrw_ai ||
Opc == Hexagon::PS_vloadrw_ai_128B);
unsigned NewOpc;
if (Aligned)
NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
: Hexagon::V6_vL32b_ai;
else
NewOpc = Is128B ? Hexagon::V6_vL32Ub_ai_128B
: Hexagon::V6_vL32Ub_ai;
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
MachineInstr *MI1New =
BuildMI(MBB, MI, DL, get(NewOpcd),
BuildMI(MBB, MI, DL, get(NewOpc),
HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
.addOperand(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm());
MI1New->getOperand(1).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpcd),
BuildMI(MBB, MI, DL, get(NewOpc),
HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
.addOperand(MI.getOperand(1))
// The Vectors are indexed in multiples of vector size.
@ -1092,34 +1115,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB.erase(MI);
return true;
}
case Hexagon::LDriv_pseudo_V6_128B:
Is128B = true;
case Hexagon::LDriv_pseudo_V6: {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
: Hexagon::V6_vL32b_ai;
int32_t Off = MI.getOperand(2).getImm();
BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
.addOperand(MI.getOperand(1))
.addImm(Off)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MI);
return true;
}
case Hexagon::STriv_pseudo_V6_128B:
Is128B = true;
case Hexagon::STriv_pseudo_V6: {
unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
: Hexagon::V6_vS32b_ai;
int32_t Off = MI.getOperand(1).getImm();
BuildMI(MBB, MI, DL, get(NewOpc))
.addOperand(MI.getOperand(0))
.addImm(Off)
.addOperand(MI.getOperand(2))
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MI);
return true;
}
case Hexagon::TFR_PdTrue: {
unsigned Reg = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
@ -2667,14 +2662,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
switch (Opcode) {
case Hexagon::STriq_pred_V6:
case Hexagon::STriq_pred_vec_V6:
case Hexagon::STriv_pseudo_V6:
case Hexagon::STrivv_pseudo_V6:
case Hexagon::PS_vstorerw_ai:
case Hexagon::LDriq_pred_V6:
case Hexagon::LDriq_pred_vec_V6:
case Hexagon::LDriv_pseudo_V6:
case Hexagon::LDrivv_pseudo_V6:
case Hexagon::LDrivv_indexed:
case Hexagon::STrivv_indexed:
case Hexagon::PS_vloadrw_ai:
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vS32b_ai:
case Hexagon::V6_vL32Ub_ai:
@ -2684,14 +2675,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::STriq_pred_V6_128B:
case Hexagon::STriq_pred_vec_V6_128B:
case Hexagon::STriv_pseudo_V6_128B:
case Hexagon::STrivv_pseudo_V6_128B:
case Hexagon::PS_vstorerw_ai_128B:
case Hexagon::LDriq_pred_V6_128B:
case Hexagon::LDriq_pred_vec_V6_128B:
case Hexagon::LDriv_pseudo_V6_128B:
case Hexagon::LDrivv_pseudo_V6_128B:
case Hexagon::LDrivv_indexed_128B:
case Hexagon::STrivv_indexed_128B:
case Hexagon::PS_vloadrw_ai_128B:
case Hexagon::V6_vL32b_ai_128B:
case Hexagon::V6_vS32b_ai_128B:
case Hexagon::V6_vL32Ub_ai_128B:

View File

@ -27,16 +27,6 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [
}]>;
// Vector store
let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
{
class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = CVI_VM_ST,
IType type = TypeCVI_VM_ST>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
}
// Vector load
let Predicates = [HasV60T, UseHVX] in
let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
@ -45,6 +35,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
IType type = TypeCVI_VM_LD>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
// Vector store
let Predicates = [HasV60T, UseHVX] in
let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@ -776,51 +767,24 @@ def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
V6_vS32b_nt_new_npred_ppu_enc;
}
let isPseudo = 1, validSubTargets = HasV60SubT in
class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
#mnemonic#"($addr+#$off) = $src", []>;
def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
Requires<[HasV60T, UseHVXSgl]>;
def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
Requires<[HasV60T, UseHVXDbl]>;
multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(STrivv_indexed_128B IntRegs:$addr, #0,
(VTDbl VecDblRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
}
defm : STrivv_pats <v128i8, v256i8>;
defm : STrivv_pats <v64i16, v128i16>;
defm : STrivv_pats <v32i32, v64i32>;
defm : STrivv_pats <v16i64, v32i64>;
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
(V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
(V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
(V6_vS32Ub_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
(V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
// 128B Aligned stores
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
(V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
(V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
(V6_vS32Ub_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
(V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
// Fold Add R+IFF into vector store.
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
(add IntRegs:$src2, s4_6ImmPred:$offset)),
@ -833,7 +797,7 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
(VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
// Fold Add R+IFF into vector store 128B.
// Fold Add R+OFF into vector store 128B.
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
(add IntRegs:$src2, s4_7ImmPred:$offset)),
(V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
@ -852,49 +816,25 @@ defm : vS32b_ai_pats <v32i16, v64i16>;
defm : vS32b_ai_pats <v16i32, v32i32>;
defm : vS32b_ai_pats <v8i64, v16i64>;
let isPseudo = 1, validSubTargets = HasV60SubT in
class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
: V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
"$dst="#mnemonic#"($addr+#$off)",
[]>,
Requires<[HasV60T,UseHVXSgl]>;
def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
def : Pat < (VTSgl (load IntRegs:$addr)),
(LDrivv_indexed IntRegs:$addr, #0) >,
Requires<[UseHVXSgl]>;
def : Pat < (VTDbl (load IntRegs:$addr)),
(LDrivv_indexed_128B IntRegs:$addr, #0) >,
Requires<[UseHVXDbl]>;
}
defm : LDrivv_pats <v128i8, v256i8>;
defm : LDrivv_pats <v64i16, v128i16>;
defm : LDrivv_pats <v32i32, v64i32>;
defm : LDrivv_pats <v16i64, v32i64>;
multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned loads
def : Pat < (VTSgl (alignedload IntRegs:$addr)),
(V6_vL32b_ai IntRegs:$addr, #0) >,
(V6_vL32b_ai IntRegs:$addr, 0) >,
Requires<[UseHVXSgl]>;
def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
(V6_vL32Ub_ai IntRegs:$addr, #0) >,
(V6_vL32Ub_ai IntRegs:$addr, 0) >,
Requires<[UseHVXSgl]>;
// 128B Load
def : Pat < (VTDbl (alignedload IntRegs:$addr)),
(V6_vL32b_ai_128B IntRegs:$addr, #0) >,
(V6_vL32b_ai_128B IntRegs:$addr, 0) >,
Requires<[UseHVXDbl]>;
def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
(V6_vL32Ub_ai_128B IntRegs:$addr, #0) >,
(V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
Requires<[UseHVXDbl]>;
// Fold Add R+IFF into vector load.
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
(V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
@ -917,6 +857,80 @@ defm : vL32b_ai_pats <v32i16, v64i16>;
defm : vL32b_ai_pats <v16i32, v32i32>;
defm : vL32b_ai_pats <v8i64, v16i64>;
// Vector load/store pseudos
let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
class STrivv_template<RegisterClass RC>
: V6_STInst<(outs), (ins IntRegs:$addr, s32Imm:$off, RC:$src), "", []>;
def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
(PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(PS_vstorerw_ai_128B IntRegs:$addr, 0,
(VTDbl VecDblRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
(PS_vstorerwu_ai_128B IntRegs:$addr, 0,
(VTDbl VecDblRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
}
defm : STrivv_pats <v128i8, v256i8>;
defm : STrivv_pats <v64i16, v128i16>;
defm : STrivv_pats <v32i32, v64i32>;
defm : STrivv_pats <v16i64, v32i64>;
let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
class LDrivv_template<RegisterClass RC>
: V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32Imm:$off), "", []>;
def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
Requires<[HasV60T,UseHVXSgl]>;
def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
def : Pat<(VTSgl (alignedload I32:$addr)),
(PS_vloadrw_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
def : Pat<(VTSgl (unalignedload I32:$addr)),
(PS_vloadrwu_ai I32:$addr, 0)>,
Requires<[UseHVXSgl]>;
def : Pat<(VTDbl (alignedload I32:$addr)),
(PS_vloadrw_ai_128B I32:$addr, 0)>,
Requires<[UseHVXDbl]>;
def : Pat<(VTDbl (unalignedload I32:$addr)),
(PS_vloadrwu_ai_128B I32:$addr, 0)>,
Requires<[UseHVXDbl]>;
}
defm : LDrivv_pats <v128i8, v256i8>;
defm : LDrivv_pats <v64i16, v128i16>;
defm : LDrivv_pats <v32i32, v64i32>;
defm : LDrivv_pats <v16i64, v32i64>;
// Store vector predicate pseudo.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
@ -970,64 +984,6 @@ def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
Requires<[HasV60T,UseHVXDbl]>;
}
// Store vector pseudo.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
def STriv_pseudo_V6 : STInst<(outs),
(ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXSgl]>;
def STriv_pseudo_V6_128B : STInst<(outs),
(ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXDbl]>;
}
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
def STrivv_pseudo_V6 : STInst<(outs),
(ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXSgl]>;
def STrivv_pseudo_V6_128B : STInst<(outs),
(ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXDbl]>;
}
// Load vector pseudo.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
(ins IntRegs:$base, s32Imm:$offset),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXSgl]>;
def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
(ins IntRegs:$base, s32Imm:$offset),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXDbl]>;
}
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
(ins IntRegs:$base, s32Imm:$offset),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXSgl]>;
def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
(ins IntRegs:$base, s32Imm:$offset),
".error \"should not emit\" ",
[]>,
Requires<[HasV60T,UseHVXDbl]>;
}
class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = CVI_VA_DV,
IType type = TypeCVI_VA_DV>
@ -1061,6 +1017,7 @@ def: VSelPat<v32i32, VectorRegs128B, PS_vselect_128B>,
def: VSelPat<v64i32, VecDblRegs128B, PS_wselect_128B>,
Requires<[HasV60T,UseHVXDbl]>;
let hasNewValue = 1 in
class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
: CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),