From 738d110269a3426f38daa507b62f048b821e00f4 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 29 Apr 2021 08:40:41 -0700 Subject: [PATCH] [EarlyIfConversion] Avoid producing selects with identical operands This extends the early-ifcvt pass to avoid a few more cases where the resulting select instructions would have matching operands. Additionally, we now use TII to determine "sameness" of the operands so that as TII gets smarter, so too will ifcvt. The attached test case was bugpoint-reduced down from CINT2000/252.eon in the test-suite. See: https://clang.godbolt.org/z/WvnrcrGEn Differential Revision: https://reviews.llvm.org/D101508 --- lib/CodeGen/EarlyIfConversion.cpp | 58 +++- .../AArch64/early-ifcvt-same-value.mir | 250 ++++++++++++++++++ test/CodeGen/PowerPC/expand-foldable-isel.ll | 32 ++- 3 files changed, 321 insertions(+), 19 deletions(-) create mode 100644 test/CodeGen/AArch64/early-ifcvt-same-value.mir diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f8095e95ddc..90883212a27 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -557,6 +557,52 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { return true; } +/// \return true iff the two registers are known to have the same value. +static bool hasSameValue(const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, Register TReg, + Register FReg) { + if (TReg == FReg) + return true; + + if (!TReg.isVirtual() || !FReg.isVirtual()) + return false; + + const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); + const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); + if (!TDef || !FDef) + return false; + + // If there are side-effects, all bets are off. + if (TDef->hasUnmodeledSideEffects()) + return false; + + // If the instruction could modify memory, or there may be some intervening + // store between the two, we can't consider them to be equal. + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + return false; + + // We also can't guarantee that they are the same if, for example, the + // instructions are both a copy from a physical reg, because some other + // instruction may have modified the value in that reg between the two + // defining insts. + if (any_of(TDef->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) + return false; + + // Check whether the two defining instructions produce the same value(s). + if (!TII->produceSameValue(*TDef, *FDef, &MRI)) + return false; + + // Further, check that the two defs come from corresponding operands. + int TIdx = TDef->findRegisterDefOperandIdx(TReg); + int FIdx = FDef->findRegisterDefOperandIdx(FReg); + if (TIdx == -1 || FIdx == -1) + return false; + + return TIdx == FIdx; +} + /// replacePHIInstrs - Completely replace PHI instructions with selects. /// This is possible when the only Tail predecessors are the if-converted /// blocks. @@ -571,7 +617,15 @@ void SSAIfConv::replacePHIInstrs() { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + } LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; @@ -592,7 +646,7 @@ void SSAIfConv::rewritePHIOperands() { unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (PI.TReg == PI.FReg) { + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal. DstReg = PI.TReg; diff --git a/test/CodeGen/AArch64/early-ifcvt-same-value.mir b/test/CodeGen/AArch64/early-ifcvt-same-value.mir new file mode 100644 index 00000000000..b9298608e19 --- /dev/null +++ b/test/CodeGen/AArch64/early-ifcvt-same-value.mir @@ -0,0 +1,250 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=early-ifcvt -stress-early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fmov0 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0 + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[FMOVS0_]] + ; CHECK: $s0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: fmov0_extrapred +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0_extrapred + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.4 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.0, [[DEF]], %bb.1 + ; CHECK: $s0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.4: + successors: %bb.3 + + ; Make sure we also handle the case when there are extra predecessors on + ; the tail block. + %3:fpr32 = IMPLICIT_DEF + B %bb.3 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1, %3, %bb.4 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: copy_physreg +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: fpr32, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: copy_physreg + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[DEF1:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY2]], [[COPY3]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %9:fpr32 = IMPLICIT_DEF implicit-def $s1 + %0:fpr32 = COPY $s1 + B %bb.3 + + bb.2: + successors: %bb.3 + + %10:fpr32 = IMPLICIT_DEF implicit-def $s1 + %1:fpr32 = COPY $s1 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: same_def_different_operand +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr64common, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64common, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } + - { reg: '$x2', virtual-reg: '%9' } +body: | + ; CHECK-LABEL: name: same_def_different_operand + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: early-clobber %11:gpr64common, %10:gpr64 = LDRXpre [[COPY]], 16 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64common = CSELXr %11, %10, 1, implicit $nzcv + ; CHECK: $x2 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x2 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0, $x2 + + %9:gpr64common = COPY $x0 + early-clobber %11:gpr64common, %10:gpr64 = LDRXpre %9:gpr64common, 16 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + B %bb.3 + + bb.2: + successors: %bb.3 + + B %bb.3 + + bb.3: + %2:gpr64common = PHI %11, %bb.2, %10, %bb.1 + $x2 = COPY %2 + RET_ReallyLR implicit $x2 + +... diff --git a/test/CodeGen/PowerPC/expand-foldable-isel.ll b/test/CodeGen/PowerPC/expand-foldable-isel.ll index b82a31c2213..834d986cce0 100644 --- a/test/CodeGen/PowerPC/expand-foldable-isel.ll +++ b/test/CodeGen/PowerPC/expand-foldable-isel.ll @@ -34,28 +34,27 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot ; CHECK-GEN-ISEL-TRUE-NEXT: std r0, 16(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: stdu r1, -64(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: mr r30, r3 -; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x4 +; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x3 ; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $r29 ; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 4 ; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_1: # %while.cond11 ; CHECK-GEN-ISEL-TRUE-NEXT: # -; CHECK-GEN-ISEL-TRUE-NEXT: lwz r3, 0(r3) -; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r3, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: lwz r4, 0(r3) +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r4, 0 ; CHECK-GEN-ISEL-TRUE-NEXT: beq cr0, .LBB0_3 ; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.2: # %while.body21 ; CHECK-GEN-ISEL-TRUE-NEXT: # ; CHECK-GEN-ISEL-TRUE-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov ; CHECK-GEN-ISEL-TRUE-NEXT: nop ; CHECK-GEN-ISEL-TRUE-NEXT: addi r4, r29, 1 -; CHECK-GEN-ISEL-TRUE-NEXT: srwi r6, r29, 1 +; CHECK-GEN-ISEL-TRUE-NEXT: srwi r5, r29, 1 ; CHECK-GEN-ISEL-TRUE-NEXT: srawi r4, r4, 1 ; CHECK-GEN-ISEL-TRUE-NEXT: std r3, 0(r3) -; CHECK-GEN-ISEL-TRUE-NEXT: addze r5, r4 -; CHECK-GEN-ISEL-TRUE-NEXT: mr r4, r3 -; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r5, r6, 4*cr5+lt +; CHECK-GEN-ISEL-TRUE-NEXT: addze r4, r4 +; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r4, r5, 4*cr5+lt ; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_1 ; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_3: # %lor.rhs -; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r4) +; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r3) ; CHECK-GEN-ISEL-TRUE-NEXT: addi r1, r1, 64 ; CHECK-GEN-ISEL-TRUE-NEXT: ld r0, 16(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -75,37 +74,36 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: # implicit-def: $x4 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: # implicit-def: $r29 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %while.cond11 ; CHECK-NEXT: # -; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: lwz r4, 0(r3) +; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB0_6 ; CHECK-NEXT: # %bb.2: # %while.body21 ; CHECK-NEXT: # ; CHECK-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov ; CHECK-NEXT: nop ; CHECK-NEXT: addi r4, r29, 1 -; CHECK-NEXT: srwi r6, r29, 1 +; CHECK-NEXT: srwi r5, r29, 1 ; CHECK-NEXT: srawi r4, r4, 1 ; CHECK-NEXT: std r3, 0(r3) -; CHECK-NEXT: addze r5, r4 -; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: addze r4, r4 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_4 ; CHECK-NEXT: # %bb.3: # %while.body21 ; CHECK-NEXT: # -; CHECK-NEXT: ori r29, r6, 0 +; CHECK-NEXT: ori r29, r5, 0 ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_4: # %while.body21 ; CHECK-NEXT: # -; CHECK-NEXT: addi r29, r5, 0 +; CHECK-NEXT: addi r29, r4, 0 ; CHECK-NEXT: .LBB0_5: # %while.body21 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_6: # %lor.rhs -; CHECK-NEXT: std r30, 16(r4) +; CHECK-NEXT: std r30, 16(r3) ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload