diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f8095e95ddc..90883212a27 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -557,6 +557,52 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { return true; } +/// \return true iff the two registers are known to have the same value. +static bool hasSameValue(const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII, Register TReg, + Register FReg) { + if (TReg == FReg) + return true; + + if (!TReg.isVirtual() || !FReg.isVirtual()) + return false; + + const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); + const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); + if (!TDef || !FDef) + return false; + + // If there are side-effects, all bets are off. + if (TDef->hasUnmodeledSideEffects()) + return false; + + // If the instruction could modify memory, or there may be some intervening + // store between the two, we can't consider them to be equal. + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr)) + return false; + + // We also can't guarantee that they are the same if, for example, the + // instructions are both a copy from a physical reg, because some other + // instruction may have modified the value in that reg between the two + // defining insts. + if (any_of(TDef->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) + return false; + + // Check whether the two defining instructions produce the same value(s). + if (!TII->produceSameValue(*TDef, *FDef, &MRI)) + return false; + + // Further, check that the two defs come from corresponding operands. + int TIdx = TDef->findRegisterDefOperandIdx(TReg); + int FIdx = FDef->findRegisterDefOperandIdx(FReg); + if (TIdx == -1 || FIdx == -1) + return false; + + return TIdx == FIdx; +} + /// replacePHIInstrs - Completely replace PHI instructions with selects. /// This is possible when the only Tail predecessors are the if-converted /// blocks. @@ -571,7 +617,15 @@ void SSAIfConv::replacePHIInstrs() { PHIInfo &PI = PHIs[i]; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); Register DstReg = PI.PHI->getOperand(0).getReg(); - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + } LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); PI.PHI = nullptr; @@ -592,7 +646,7 @@ void SSAIfConv::rewritePHIOperands() { unsigned DstReg = 0; LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (PI.TReg == PI.FReg) { + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { // We do not need the select instruction if both incoming values are // equal. DstReg = PI.TReg; diff --git a/test/CodeGen/AArch64/early-ifcvt-same-value.mir b/test/CodeGen/AArch64/early-ifcvt-same-value.mir new file mode 100644 index 00000000000..b9298608e19 --- /dev/null +++ b/test/CodeGen/AArch64/early-ifcvt-same-value.mir @@ -0,0 +1,250 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=early-ifcvt -stress-early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: fmov0 +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0 + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[FMOVS0_]] + ; CHECK: $s0 = COPY [[COPY2]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: fmov0_extrapred +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: fmov0_extrapred + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: B %bb.4 + ; CHECK: bb.1: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF + ; CHECK: B %bb.4 + ; CHECK: bb.4: + ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.0, [[DEF]], %bb.1 + ; CHECK: $s0 = COPY [[PHI]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.4: + successors: %bb.3 + + ; Make sure we also handle the case when there are extra predecessors on + ; the tail block. + %3:fpr32 = IMPLICIT_DEF + B %bb.3 + + bb.1: + successors: %bb.3 + + %0:fpr32 = FMOVS0 + B %bb.3 + + bb.2: + successors: %bb.3 + + %1:fpr32 = FMOVS0 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1, %3, %bb.4 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: copy_physreg +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: fpr32, preferred-register: '' } + - { id: 10, class: fpr32, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: copy_physreg + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv + ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[DEF1:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1 + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY2]], [[COPY3]], 1, implicit $nzcv + ; CHECK: $s0 = COPY [[FCSELSrrr]] + ; CHECK: RET_ReallyLR implicit $s0 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + %9:fpr32 = IMPLICIT_DEF implicit-def $s1 + %0:fpr32 = COPY $s1 + B %bb.3 + + bb.2: + successors: %bb.3 + + %10:fpr32 = IMPLICIT_DEF implicit-def $s1 + %1:fpr32 = COPY $s1 + + bb.3: + %2:fpr32 = PHI %1, %bb.2, %0, %bb.1 + $s0 = COPY %2 + RET_ReallyLR implicit $s0 + +... +--- +name: same_def_different_operand +tracksRegLiveness: true +registers: + - { id: 0, class: fpr32, preferred-register: '' } + - { id: 1, class: fpr32, preferred-register: '' } + - { id: 2, class: gpr64common, preferred-register: '' } + - { id: 3, class: fpr32, preferred-register: '' } + - { id: 4, class: fpr32, preferred-register: '' } + - { id: 5, class: gpr32common, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr32, preferred-register: '' } + - { id: 8, class: fpr32, preferred-register: '' } + - { id: 9, class: gpr64common, preferred-register: '' } + - { id: 10, class: gpr64, preferred-register: '' } + - { id: 11, class: gpr64common, preferred-register: '' } +liveins: + - { reg: '$s1', virtual-reg: '%4' } + - { reg: '$w0', virtual-reg: '%5' } + - { reg: '$x2', virtual-reg: '%9' } +body: | + ; CHECK-LABEL: name: same_def_different_operand + ; CHECK: bb.0.entry: + ; CHECK: liveins: $s1, $w0, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: early-clobber %11:gpr64common, %10:gpr64 = LDRXpre [[COPY]], 16 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w0 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv + ; CHECK: [[CSELXr:%[0-9]+]]:gpr64common = CSELXr %11, %10, 1, implicit $nzcv + ; CHECK: $x2 = COPY [[CSELXr]] + ; CHECK: RET_ReallyLR implicit $x2 + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $s1, $w0, $x2 + + %9:gpr64common = COPY $x0 + early-clobber %11:gpr64common, %10:gpr64 = LDRXpre %9:gpr64common, 16 + + %5:gpr32common = COPY $w0 + %4:fpr32 = COPY $s1 + %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.3 + + B %bb.3 + + bb.2: + successors: %bb.3 + + B %bb.3 + + bb.3: + %2:gpr64common = PHI %11, %bb.2, %10, %bb.1 + $x2 = COPY %2 + RET_ReallyLR implicit $x2 + +... diff --git a/test/CodeGen/PowerPC/expand-foldable-isel.ll b/test/CodeGen/PowerPC/expand-foldable-isel.ll index b82a31c2213..834d986cce0 100644 --- a/test/CodeGen/PowerPC/expand-foldable-isel.ll +++ b/test/CodeGen/PowerPC/expand-foldable-isel.ll @@ -34,28 +34,27 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot ; CHECK-GEN-ISEL-TRUE-NEXT: std r0, 16(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: stdu r1, -64(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: mr r30, r3 -; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x4 +; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x3 ; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $r29 ; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 4 ; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_1: # %while.cond11 ; CHECK-GEN-ISEL-TRUE-NEXT: # -; CHECK-GEN-ISEL-TRUE-NEXT: lwz r3, 0(r3) -; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r3, 0 +; CHECK-GEN-ISEL-TRUE-NEXT: lwz r4, 0(r3) +; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r4, 0 ; CHECK-GEN-ISEL-TRUE-NEXT: beq cr0, .LBB0_3 ; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.2: # %while.body21 ; CHECK-GEN-ISEL-TRUE-NEXT: # ; CHECK-GEN-ISEL-TRUE-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov ; CHECK-GEN-ISEL-TRUE-NEXT: nop ; CHECK-GEN-ISEL-TRUE-NEXT: addi r4, r29, 1 -; CHECK-GEN-ISEL-TRUE-NEXT: srwi r6, r29, 1 +; CHECK-GEN-ISEL-TRUE-NEXT: srwi r5, r29, 1 ; CHECK-GEN-ISEL-TRUE-NEXT: srawi r4, r4, 1 ; CHECK-GEN-ISEL-TRUE-NEXT: std r3, 0(r3) -; CHECK-GEN-ISEL-TRUE-NEXT: addze r5, r4 -; CHECK-GEN-ISEL-TRUE-NEXT: mr r4, r3 -; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r5, r6, 4*cr5+lt +; CHECK-GEN-ISEL-TRUE-NEXT: addze r4, r4 +; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r4, r5, 4*cr5+lt ; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_1 ; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_3: # %lor.rhs -; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r4) +; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r3) ; CHECK-GEN-ISEL-TRUE-NEXT: addi r1, r1, 64 ; CHECK-GEN-ISEL-TRUE-NEXT: ld r0, 16(r1) ; CHECK-GEN-ISEL-TRUE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -75,37 +74,36 @@ define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: # implicit-def: $x4 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: # implicit-def: $r29 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %while.cond11 ; CHECK-NEXT: # -; CHECK-NEXT: lwz r3, 0(r3) -; CHECK-NEXT: cmplwi r3, 0 +; CHECK-NEXT: lwz r4, 0(r3) +; CHECK-NEXT: cmplwi r4, 0 ; CHECK-NEXT: beq cr0, .LBB0_6 ; CHECK-NEXT: # %bb.2: # %while.body21 ; CHECK-NEXT: # ; CHECK-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov ; CHECK-NEXT: nop ; CHECK-NEXT: addi r4, r29, 1 -; CHECK-NEXT: srwi r6, r29, 1 +; CHECK-NEXT: srwi r5, r29, 1 ; CHECK-NEXT: srawi r4, r4, 1 ; CHECK-NEXT: std r3, 0(r3) -; CHECK-NEXT: addze r5, r4 -; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: addze r4, r4 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_4 ; CHECK-NEXT: # %bb.3: # %while.body21 ; CHECK-NEXT: # -; CHECK-NEXT: ori r29, r6, 0 +; CHECK-NEXT: ori r29, r5, 0 ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_4: # %while.body21 ; CHECK-NEXT: # -; CHECK-NEXT: addi r29, r5, 0 +; CHECK-NEXT: addi r29, r4, 0 ; CHECK-NEXT: .LBB0_5: # %while.body21 ; CHECK-NEXT: # ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_6: # %lor.rhs -; CHECK-NEXT: std r30, 16(r4) +; CHECK-NEXT: std r30, 16(r3) ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload