From af6f88a0b500156149a7367aad220b59c1b0bbfc Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 25 Mar 2019 22:42:30 +0000 Subject: [PATCH] [ARM] Add missing memory operands to a bunch of instructions. This should hopefully lead to minor improvements in code generation, and more accurate spill/reload comments in assembly. Also fix isLoadFromStackSlotPostFE/isStoreToStackSlotPostFE so they don't lead to misleading assembly comments for merged memory operands; this is technically orthogonal, but in practice the relevant memory operand lists don't show up without this change. Differential Revision: https://reviews.llvm.org/D59713 llvm-svn: 356963 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 6 +- lib/Target/ARM/ARMISelLowering.cpp | 12 +++- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 66 +++++++++++++------ .../Thumb2/umulo-128-legalisation-lowering.ll | 10 +-- 4 files changed, 64 insertions(+), 30 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 12a2d7a1d5e..bb222edbcef 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1176,7 +1176,8 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { SmallVector Accesses; - if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses)) { + if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) && + Accesses.size() == 1) { FrameIndex = cast(Accesses.front()->getPseudoValue()) ->getFrameIndex(); @@ -1396,7 +1397,8 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const { SmallVector Accesses; - if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses)) { + if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) && + Accesses.size() == 1) { FrameIndex = cast(Accesses.front()->getPseudoValue()) ->getFrameIndex(); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 36df387cd1b..0a1289b12aa 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9015,18 +9015,23 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, if (Align == 0) Align = MF->getDataLayout().getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); + MachineMemOperand *CPMMO = + MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF), + MachineMemOperand::MOLoad, 4, 4); if (IsThumb) BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)) .addReg(varEnd, RegState::Define) .addConstantPoolIndex(Idx) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .addMemOperand(CPMMO); else BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)) .addReg(varEnd, RegState::Define) .addConstantPoolIndex(Idx) .addImm(0) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .addMemOperand(CPMMO); } BB->addSuccessor(loopMBB); @@ -9274,7 +9279,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .add(MI.getOperand(2)) // Rn .add(MI.getOperand(3)) // PredImm .add(MI.getOperand(4)) // PredReg - .add(MI.getOperand(0)); // Rt + .add(MI.getOperand(0)) // Rt + .cloneMemRefs(MI); MI.eraseFromParent(); return BB; } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 13b5445eaba..21aa3e0ab34 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -173,12 +173,14 @@ namespace { MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, - ArrayRef> Regs); + ArrayRef> Regs, + ArrayRef Instrs); MachineInstr *CreateLoadStoreDouble( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, - ArrayRef> Regs) const; + ArrayRef> Regs, + ArrayRef Instrs) const; void FormCandidates(const MemOpQueue &MemOps); MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, @@ -622,7 +624,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, - ArrayRef> Regs) { + ArrayRef> Regs, + ArrayRef Instrs) { unsigned NumRegs = Regs.size(); assert(NumRegs > 1); @@ -814,6 +817,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( for (const std::pair &R : Regs) MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second)); + MIB.cloneMergedMemRefs(Instrs); + return MIB.getInstr(); } @@ -821,7 +826,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble( MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base, bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL, - ArrayRef> Regs) const { + ArrayRef> Regs, + ArrayRef Instrs) const { bool IsLoad = isi32Load(Opcode); assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store"); unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8; @@ -837,6 +843,7 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble( .addReg(Regs[1].first, getKillRegState(Regs[1].second)); } MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + MIB.cloneMergedMemRefs(Instrs); return MIB.getInstr(); } @@ -894,10 +901,11 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { MachineInstr *Merged = nullptr; if (Cand.CanMergeToLSDouble) Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill, - Opcode, Pred, PredReg, DL, Regs); + Opcode, Pred, PredReg, DL, Regs, + Cand.Instrs); if (!Merged && Cand.CanMergeToLSMulti) Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill, - Opcode, Pred, PredReg, DL, Regs); + Opcode, Pred, PredReg, DL, Regs, Cand.Instrs); if (!Merged) return nullptr; @@ -1435,14 +1443,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { .addReg(Base, getKillRegState(isLd ? BaseKill : false)) .addImm(Pred).addReg(PredReg) .addReg(MO.getReg(), (isLd ? getDefRegState(true) : - getKillRegState(MO.isKill()))); + getKillRegState(MO.isKill()))) + .cloneMemRefs(*MI); } else if (isLd) { if (isAM2) { // LDR_PRE, LDR_POST if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) { BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) .addReg(Base, RegState::Define) - .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg) + .cloneMemRefs(*MI); } else { int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg()) @@ -1450,7 +1460,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { .addReg(Base) .addReg(0) .addImm(Imm) - .add(predOps(Pred, PredReg)); + .add(predOps(Pred, PredReg)) + .cloneMemRefs(*MI); } } else { // t2LDR_PRE, t2LDR_POST @@ -1458,7 +1469,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { .addReg(Base, RegState::Define) .addReg(Base) .addImm(Offset) - .add(predOps(Pred, PredReg)); + .add(predOps(Pred, PredReg)) + .cloneMemRefs(*MI); } } else { MachineOperand &MO = MI->getOperand(0); @@ -1473,14 +1485,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) { .addReg(Base) .addReg(0) .addImm(Imm) - .add(predOps(Pred, PredReg)); + .add(predOps(Pred, PredReg)) + .cloneMemRefs(*MI); } else { // t2STR_PRE, t2STR_POST BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base) .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base) .addImm(Offset) - .add(predOps(Pred, PredReg)); + .add(predOps(Pred, PredReg)) + .cloneMemRefs(*MI); } } MBB.erase(MBBI); @@ -1540,7 +1554,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const { // Transfer implicit operands. for (const MachineOperand &MO : MI.implicit_operands()) MIB.add(MO); - MIB.setMemRefs(MI.memoperands()); + MIB.cloneMemRefs(MI); MBB.erase(MBBI); return true; @@ -1608,19 +1622,26 @@ static void InsertLDR_STR(MachineBasicBlock &MBB, bool isDef, unsigned NewOpc, unsigned Reg, bool RegDeadKill, bool RegUndef, unsigned BaseReg, bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, - unsigned PredReg, const TargetInstrInfo *TII) { + unsigned PredReg, const TargetInstrInfo *TII, + MachineInstr *MI) { if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + // FIXME: This is overly conservative; the new instruction accesses 4 + // bytes, not 8. + MIB.cloneMemRefs(*MI); } else { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + // FIXME: This is overly conservative; the new instruction accesses 4 + // bytes, not 8. + MIB.cloneMemRefs(*MI); } } @@ -1678,7 +1699,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, .addReg(BaseReg, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) - .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); + .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)) + .cloneMemRefs(*MI); ++NumLDRD2LDM; } else { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) @@ -1687,7 +1709,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, .addReg(EvenReg, getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) .addReg(OddReg, - getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); + getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)) + .cloneMemRefs(*MI); ++NumSTRD2STM; } } else { @@ -1705,9 +1728,10 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) { assert(!TRI->regsOverlap(OddReg, BaseReg)); InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill, - false, BaseReg, false, BaseUndef, Pred, PredReg, TII); + false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill, - false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII); + false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII, + MI); } else { if (OddReg == EvenReg && EvenDeadKill) { // If the two source operands are the same, the kill marker is @@ -1720,9 +1744,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if (EvenReg == BaseReg) EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill, - EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII); + EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII, + MI); InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill, - OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII); + OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII, + MI); } if (isLd) ++NumLDRD2LDR; diff --git a/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll index 5300bed0de8..ac1c814b838 100644 --- a/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ b/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -8,10 +8,10 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; THUMBV7-NEXT: .pad #44 ; THUMBV7-NEXT: sub sp, #44 -; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill -; THUMBV7-NEXT: movs r0, #0 ; THUMBV7-NEXT: ldrd r4, r7, [sp, #88] ; THUMBV7-NEXT: mov r5, r3 +; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; THUMBV7-NEXT: movs r0, #0 ; THUMBV7-NEXT: strd r4, r7, [sp] ; THUMBV7-NEXT: mov r1, r3 ; THUMBV7-NEXT: strd r0, r0, [sp, #8] @@ -20,8 +20,8 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: movs r2, #0 ; THUMBV7-NEXT: movs r3, #0 ; THUMBV7-NEXT: bl __multi3 -; THUMBV7-NEXT: strd r1, r0, [sp, #32] -; THUMBV7-NEXT: strd r3, r2, [sp, #24] +; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill +; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill ; THUMBV7-NEXT: ldrd r2, r0, [sp, #96] ; THUMBV7-NEXT: ldr.w r9, [sp, #80] ; THUMBV7-NEXT: umull lr, r0, r0, r6 @@ -47,7 +47,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: adds r3, r3, r6 ; THUMBV7-NEXT: ldr r6, [sp, #24] @ 4-byte Reload ; THUMBV7-NEXT: adcs r2, r6 -; THUMBV7-NEXT: ldrd r6, lr, [sp, #36] +; THUMBV7-NEXT: ldrd r6, lr, [sp, #36] @ 8-byte Folded Reload ; THUMBV7-NEXT: str.w r6, [lr] ; THUMBV7-NEXT: adc r8, r8, #0 ; THUMBV7-NEXT: ldr r6, [sp, #32] @ 4-byte Reload