diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 693a41372a6..e8735b31628 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3190,55 +3190,18 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, return false; } -// This function tries to combine two RLWINMs. We not only perform such -// optimization in SSA, but also after RA, since some RLWINM is generated after -// RA. -bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, - MachineInstr *&ToErase) const { - bool Is64Bit = false; - switch (MI.getOpcode()) { - case PPC::RLWINM: - case PPC::RLWINM_rec: - break; - case PPC::RLWINM8: - case PPC::RLWINM8_rec: - Is64Bit = true; - break; - default: - return false; - } +bool PPCInstrInfo::combineRLWINM(MachineInstr &MI, + MachineInstr **ToErase) const { MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); - Register FoldingReg = MI.getOperand(1).getReg(); - MachineInstr *SrcMI = nullptr; - bool NoUse = false; - if (MRI->isSSA()) { - if (!Register::isVirtualRegister(FoldingReg)) - return false; - SrcMI = MRI->getVRegDef(FoldingReg); - } else { - bool OtherIntermediateUse = false; - SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse); - NoUse = !OtherIntermediateUse && MI.getOperand(1).isKill(); - } - if (!SrcMI) + unsigned FoldingReg = MI.getOperand(1).getReg(); + if (!Register::isVirtualRegister(FoldingReg)) return false; - // TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before - // RA, but after RA. And We can fold RLWINM8(RLWINM) -> RLWINM8, or - // RLWINM(RLWINM8) -> RLWINM. - switch (SrcMI->getOpcode()) { - case PPC::RLWINM: - case PPC::RLWINM_rec: - if (Is64Bit) - return false; - break; - case PPC::RLWINM8: - case PPC::RLWINM8_rec: - if (!Is64Bit) - return false; - break; - default: + MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg); + if (SrcMI->getOpcode() != PPC::RLWINM && + SrcMI->getOpcode() != PPC::RLWINM_rec && + SrcMI->getOpcode() != PPC::RLWINM8 && + SrcMI->getOpcode() != PPC::RLWINM8_rec) return false; - } assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() && MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() && SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) && @@ -3293,6 +3256,8 @@ bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, // If final mask is 0, MI result should be 0 too. if (FinalMask.isNullValue()) { + bool Is64Bit = + (MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec); Simplified = true; LLVM_DEBUG(dbgs() << "Replace Instr: "); LLVM_DEBUG(MI.dump()); @@ -3350,15 +3315,14 @@ bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI, LLVM_DEBUG(dbgs() << "To: "); LLVM_DEBUG(MI.dump()); } - if (Simplified && !SrcMI->hasImplicitDef()) { - // If SrcMI has no implicit def, and FoldingReg has no non-debug use or - // its flag is "killed", it's safe to delete SrcMI. Otherwise keep it. - if ((!MRI->isSSA() && NoUse) || - (MRI->isSSA() && MRI->use_nodbg_empty(FoldingReg))) { - ToErase = SrcMI; - LLVM_DEBUG(dbgs() << "Delete dead instruction: "); - LLVM_DEBUG(SrcMI->dump()); - } + if (Simplified & MRI->use_nodbg_empty(FoldingReg) && + !SrcMI->hasImplicitDef()) { + // If FoldingReg has no non-debug use and it has no implicit def (it + // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI. + // Otherwise keep it. + *ToErase = SrcMI; + LLVM_DEBUG(dbgs() << "Delete dead instruction: "); + LLVM_DEBUG(SrcMI->dump()); } return Simplified; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 0a2564a51fc..f3ada5a0feb 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -564,8 +564,7 @@ public: bool convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef = nullptr) const; bool foldFrameOffset(MachineInstr &MI) const; - bool simplifyRotateAndMaskInstr(MachineInstr &MI, - MachineInstr *&ToErase) const; + bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const; bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const; bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const; bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg, diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 827d3c4693b..3fd02bc185c 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -848,7 +848,7 @@ bool PPCMIPeephole::simplifyCode(void) { case PPC::RLWINM_rec: case PPC::RLWINM8: case PPC::RLWINM8_rec: { - Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase); + Simplified = TII->combineRLWINM(MI, &ToErase); if (Simplified) ++NumRotatesCollapsed; break; diff --git a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp index 8e4a50c1b33..7d1282df369 100644 --- a/lib/Target/PowerPC/PPCPreEmitPeephole.cpp +++ b/lib/Target/PowerPC/PPCPreEmitPeephole.cpp @@ -37,8 +37,6 @@ STATISTIC(NumberOfSelfCopies, "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); -STATISTIC(NumRotateInstrFoldInPreEmit, - "Number of folding Rotate instructions in pre-emit peephole"); static cl::opt EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true), @@ -415,13 +413,6 @@ static bool hasPCRelativeForm(MachineInstr &Use) { LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } - MachineInstr *ToErase = nullptr; - if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) { - Changed = true; - NumRotateInstrFoldInPreEmit++; - if (ToErase) - InstrsToErase.push_back(ToErase); - } } // Eliminate conditional branch based on a constant CR bit by diff --git a/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir b/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir deleted file mode 100644 index b26487e6f13..00000000000 --- a/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir +++ /dev/null @@ -1,104 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \ -# RUN: ppc-pre-emit-peephole %s -o - | FileCheck %s - ---- -name: testFoldRLWINM -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINM - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 5, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMSrcFullMask -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMSrcFullMask - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 0, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMSrcWrapped -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMSrcWrapped - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 30, 10 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINMToZero -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMToZero - ; CHECK: liveins: $r3 - ; CHECK: renamable $r3 = LI 0, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 27, 5, 10 - dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... ---- -name: testFoldRLWINM_recToZero -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINM_recToZero - ; CHECK: liveins: $r3 - ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 - $r3 = RLWINM killed $r3, 27, 5, 10 - dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 -... ---- -name: testFoldRLWINMoToZeroSrcCanNotBeDeleted -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMoToZeroSrcCanNotBeDeleted - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def dead $cr0 - ; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 - $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0 - dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0 - BLR8 implicit $lr8, implicit $rm, implicit killed $cr0 -... ---- -name: testFoldRLWINMInvalidMask -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r3 - ; CHECK-LABEL: name: testFoldRLWINMInvalidMask - ; CHECK: liveins: $r3 - ; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31 - ; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 - ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 - $r3 = RLWINM killed $r3, 20, 5, 31 - dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3 - BLR8 implicit $lr8, implicit $rm, implicit killed $x3 -... diff --git a/test/CodeGen/PowerPC/sms-phi-5.ll b/test/CodeGen/PowerPC/sms-phi-5.ll index c147ddf8182..bdc773de8aa 100644 --- a/test/CodeGen/PowerPC/sms-phi-5.ll +++ b/test/CodeGen/PowerPC/sms-phi-5.ll @@ -14,8 +14,9 @@ define void @phi5() unnamed_addr { ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: lhz 3, 0(3) ; CHECK-NEXT: slwi 3, 3, 15 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: ori 3, 4, 0 +; CHECK-NEXT: clrlwi 3, 3, 31 +; CHECK-NEXT: rlwinm 4, 3, 31, 17, 31 +; CHECK-NEXT: or 3, 3, 4 ; CHECK-NEXT: rlwimi 3, 3, 15, 0, 16 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: blr diff --git a/test/CodeGen/PowerPC/vsx_builtins.ll b/test/CodeGen/PowerPC/vsx_builtins.ll index b40a84a7e95..0aae50af264 100644 --- a/test/CodeGen/PowerPC/vsx_builtins.ll +++ b/test/CodeGen/PowerPC/vsx_builtins.ll @@ -131,7 +131,8 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvtdivdp cr0, v2, v3 ; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: srwi r3, r3, 28 +; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31 ; CHECK-NEXT: blr entry: %0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)