From 83ca134320f4be47a7c8bd79e262adb151aba597 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 17 Jan 2020 14:34:26 -0800 Subject: [PATCH] [AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible. In GlobalISel we may in some unfortunate circumstances generate PHIs with operands that are on separate banks. If-conversion doesn't currently check for that case and ends up generating a CSEL on AArch64 with incorrect register operands. Differential Revision: https://reviews.llvm.org/D72961 --- include/llvm/CodeGen/TargetInstrInfo.h | 8 +- lib/CodeGen/EarlyIfConversion.cpp | 5 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 11 +- lib/Target/AArch64/AArch64InstrInfo.h | 3 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +- lib/Target/AMDGPU/SIInstrInfo.h | 5 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 7 +- lib/Target/PowerPC/PPCInstrInfo.h | 3 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 5 +- lib/Target/SystemZ/SystemZInstrInfo.h | 5 +- lib/Target/X86/X86InstrInfo.cpp | 10 +- lib/Target/X86/X86InstrInfo.h | 3 +- .../AArch64/early-ifcvt-regclass-mismatch.mir | 171 ++++++++++++++++++ 13 files changed, 213 insertions(+), 27 deletions(-) create mode 100644 test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir diff --git a/include/llvm/CodeGen/TargetInstrInfo.h b/include/llvm/CodeGen/TargetInstrInfo.h index 536fd248909..4d1dfe1b0c6 100644 --- a/include/llvm/CodeGen/TargetInstrInfo.h +++ b/include/llvm/CodeGen/TargetInstrInfo.h @@ -838,15 +838,17 @@ public: /// /// @param MBB Block where select instruction would be inserted. /// @param Cond Condition returned by analyzeBranch. + /// @param DstReg Virtual dest register that the result should write to. /// @param TrueReg Virtual register to select when Cond is true. /// @param FalseReg Virtual register to select when Cond is false. /// @param CondCycles Latency from Cond+Branch to select output. /// @param TrueCycles Latency from TrueReg to select output. /// @param FalseCycles Latency from FalseReg to select output. virtual bool canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef Cond, unsigned TrueReg, - unsigned FalseReg, int &CondCycles, - int &TrueCycles, int &FalseCycles) const { + ArrayRef Cond, unsigned DstReg, + unsigned TrueReg, unsigned FalseReg, + int &CondCycles, int &TrueCycles, + int &FalseCycles) const { return false; } diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index fc31a9449e7..a67072ce340 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); // Get target information. - if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, - PI.CondCycles, PI.TCycles, PI.FCycles)) { + if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(), + PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles, + PI.FCycles)) { LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); return false; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 0ed2a678c4f..c07fb3a44b6 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -496,8 +496,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, + unsigned DstReg, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { // Check register classes. const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -506,6 +507,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, if (!RC) return false; + // Also need to check the dest regclass, in case we're trying to optimize + // something like: + // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2 + if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg))) + return false; + // Expanding cbz/tbz requires an extra cycle of latency on the condition. unsigned ExtraCondLat = Cond.size() != 1; diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 66e517e5490..f041624225e 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -191,7 +191,8 @@ public: bool reverseBranchCondition(SmallVectorImpl &Cond) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, - unsigned, unsigned, int &, int &, int &) const override; + unsigned, unsigned, unsigned, int &, int &, + int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index e1ee5b721d5..c2ac27553ae 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2128,8 +2128,8 @@ bool SIInstrInfo::reverseBranchCondition( bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, + unsigned DstReg, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { switch (Cond[0].getImm()) { case VCCNZ: diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index b151a94b0d1..d988a00686c 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -293,9 +293,8 @@ public: SmallVectorImpl &Cond) const override; bool canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, + ArrayRef Cond, unsigned DstReg, + unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override; void insertSelect(MachineBasicBlock &MBB, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index c8adbbb8704..6b448365215 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -753,9 +753,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB, // Select analysis. bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, int &FalseCycles) const { + ArrayRef Cond, + unsigned DstReg, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { if (Cond.size() != 2) return false; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 21da3457dc9..a549b8221a7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -273,7 +273,8 @@ public: // Select analysis. bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, - unsigned, unsigned, int &, int &, int &) const override; + unsigned, unsigned, unsigned, int &, int &, + int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 97c8fa7aa32..191792ad41e 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, ArrayRef Pred, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, + unsigned DstReg, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { // Not all subtargets have LOCR instructions. if (!STI.hasLoadStoreOnCond()) diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 8391970c7d9..c3daf9e9c62 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -221,8 +221,9 @@ public: int *BytesAdded = nullptr) const override; bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const override; - bool canInsertSelect(const MachineBasicBlock&, ArrayRef Cond, - unsigned, unsigned, int&, int&, int&) const override; + bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, + unsigned, unsigned, unsigned, int &, int &, + int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 245346d8273..6cefb15fe9c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2826,11 +2826,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB, return Count; } -bool X86InstrInfo:: -canInsertSelect(const MachineBasicBlock &MBB, - ArrayRef Cond, - unsigned TrueReg, unsigned FalseReg, - int &CondCycles, int &TrueCycles, int &FalseCycles) const { +bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB, + ArrayRef Cond, + unsigned DstReg, unsigned TrueReg, + unsigned FalseReg, int &CondCycles, + int &TrueCycles, int &FalseCycles) const { // Not all subtargets have cmov instructions. if (!Subtarget.hasCMov()) return false; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 1d2da530535..7c7124383f1 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -306,7 +306,8 @@ public: const DebugLoc &DL, int *BytesAdded = nullptr) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, - unsigned, unsigned, int &, int &, int &) const override; + unsigned, unsigned, unsigned, int &, int &, + int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, diff --git a/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir b/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir new file mode 100644 index 00000000000..436baae1a2b --- /dev/null +++ b/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir @@ -0,0 +1,171 @@ +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios13.3.0" + define hidden void @phi_operands_regclasses_different() #0 { + entry: + br i1 undef, label %if.then139.i, label %if.else142.i + + if.then139.i: ; preds = %entry + %0 = load double, double* undef, align 8 + br label %if.end161.i + + if.else142.i: ; preds = %entry + %tobool154.i = icmp eq i8 undef, 0 + br i1 %tobool154.i, label %if.then155.i, label %if.end161.i + + if.then155.i: ; preds = %if.else142.i + %add158.i = fadd double undef, 0.000000e+00 + br label %if.end161.i + + if.end161.i: ; preds = %if.then155.i, %if.else142.i, %if.then139.i + %y2.0.i = phi double [ %0, %if.then139.i ], [ 0.000000e+00, %if.else142.i ], [ 0.000000e+00, %if.then155.i ] + %add169.i = fadd double 0.000000e+00, %y2.0.i + %1 = fmul double undef, %add169.i + %add174.i = fsub double undef, %1 + %2 = call double @llvm.fabs.f64(double %add174.i) #2 + %cmp.i516.i = fcmp olt double %2, 0x3BC79CA10C924223 + br i1 %cmp.i516.i, label %if.then.i.i, label %if.end9.i.i + + if.then.i.i: ; preds = %if.end161.i + %3 = call double @llvm.fabs.f64(double undef) #2 + unreachable + + if.end9.i.i: ; preds = %if.end161.i + ret void + } + declare double @llvm.fabs.f64(double) #1 + declare void @llvm.stackprotector(i8*, i8**) #2 + + attributes #0 = { "target-cpu"="apple-a7" } + attributes #1 = { nounwind readnone speculatable willreturn } + attributes #2 = { nounwind } + + !llvm.ident = !{!0} + + !0 = !{!"clang"} + +... +--- +name: phi_operands_regclasses_different +alignment: 4 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: true +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: gpr, preferred-register: '' } + - { id: 4, class: gpr64, preferred-register: '' } + - { id: 5, class: fpr, preferred-register: '' } + - { id: 6, class: _, preferred-register: '' } + - { id: 7, class: gpr64, preferred-register: '' } + - { id: 8, class: gpr64common, preferred-register: '' } + - { id: 9, class: gpr64, preferred-register: '' } + - { id: 10, class: fpr64, preferred-register: '' } + - { id: 11, class: fpr64, preferred-register: '' } + - { id: 12, class: fpr, preferred-register: '' } + - { id: 13, class: fpr64, preferred-register: '' } + - { id: 14, class: fpr, preferred-register: '' } + - { id: 15, class: gpr32, preferred-register: '' } + - { id: 16, class: _, preferred-register: '' } + - { id: 17, class: gpr32, preferred-register: '' } + - { id: 18, class: gpr32, preferred-register: '' } + - { id: 19, class: gpr32, preferred-register: '' } + - { id: 20, class: gpr, preferred-register: '' } + - { id: 21, class: fpr64, preferred-register: '' } + - { id: 22, class: fpr64, preferred-register: '' } + - { id: 23, class: fpr64, preferred-register: '' } + - { id: 24, class: fpr64, preferred-register: '' } + - { id: 25, class: fpr64, preferred-register: '' } + - { id: 26, class: fpr64, preferred-register: '' } + - { id: 27, class: fpr64, preferred-register: '' } + - { id: 28, class: gpr64, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; Here we check that we don't ifcvt to a CSEL that uses GPRs, because + ; some operands to the PHI have the fpr64 regclass. + ; CHECK-LABEL: name: phi_operands_regclasses_different + ; CHECK-NOT: CSELXr + bb.1.entry: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %0:gpr32 = IMPLICIT_DEF + %4:gpr64 = IMPLICIT_DEF + %8:gpr64common = IMPLICIT_DEF + TBNZW %0, 0, %bb.2 + B %bb.3 + + bb.2.if.then139.i: + successors: %bb.5(0x80000000) + + %7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`) + B %bb.5 + + bb.3.if.else142.i: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %26:fpr64 = FMOVD0 + %19:gpr32 = COPY $wzr + CBNZW %19, %bb.5 + + bb.4.if.then155.i: + successors: %bb.5(0x80000000) + + %27:fpr64 = FMOVD0 + + bb.5.if.end161.i: + successors: %bb.6(0x40000000), %bb.7(0x40000000) + + %9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4 + %21:fpr64 = COPY %9 + %25:fpr64 = FMOVD0 + %10:fpr64 = FADDDrr %25, %21 + %22:fpr64 = COPY %4 + %11:fpr64 = FMULDrr %22, %10 + %23:fpr64 = COPY %4 + %13:fpr64 = FABD64 %23, %11 + %28:gpr64 = MOVi64imm 4307583784117748259 + %24:fpr64 = COPY %28 + FCMPDrr %13, %24, implicit-def $nzcv + %17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv + TBNZW %17, 0, %bb.6 + B %bb.7 + + bb.6.if.then.i.i: + successors: + + + bb.7.if.end9.i.i: + RET_ReallyLR + +...