diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index 085ef067f2d..ead43731809 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1537,15 +1537,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) { - bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64; - - // Don't fold if we are using source modifiers. The new VOP2 instructions - // don't have them. - if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) || - hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) { + // Don't fold if we are using source or output modifiers. The new VOP2 + // instructions don't have them. + if (hasAnyModifiersSet(UseMI)) return false; - } const MachineOperand &ImmOp = DefMI.getOperand(1); @@ -1558,6 +1553,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (isInlineConstant(UseMI, *Src0, ImmOp)) return false; + bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64; MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); @@ -1944,6 +1940,14 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, return Mods && Mods->getImm(); } +bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const { + return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || + hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) || + hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) || + hasModifiersSet(MI, AMDGPU::OpName::clamp) || + hasModifiersSet(MI, AMDGPU::OpName::omod); +} + bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const { diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 34897e90448..73b997df7bd 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -607,6 +607,7 @@ public: bool hasModifiersSet(const MachineInstr &MI, unsigned OpName) const; + bool hasAnyModifiersSet(const MachineInstr &MI) const; bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; diff --git a/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir b/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir new file mode 100644 index 00000000000..986c6b296c9 --- /dev/null +++ b/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir @@ -0,0 +1,306 @@ +# RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @no_fold_imm_madak_mac_clamp_f32() #0 { + ret void + } + + define amdgpu_kernel void @no_fold_imm_madak_mac_omod_f32() #0 { + ret void + } + + define amdgpu_kernel void @no_fold_imm_madak_mad_clamp_f32() #0 { + ret void + } + + define amdgpu_kernel void @no_fold_imm_madak_mad_omod_f32() #0 { + ret void + } + + attributes #0 = { nounwind } + +... +--- +# GCN-LABEL: name: no_fold_imm_madak_mac_clamp_f32 +# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN-NEXT: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec + +name: no_fold_imm_madak_mac_clamp_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64_xexec } + - { id: 5, class: sreg_64_xexec } + - { id: 6, class: sreg_64_xexec } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32_xm0 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sgpr_64 } + - { id: 14, class: sgpr_128 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: sgpr_128 } + - { id: 18, class: sgpr_128 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vreg_64 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vreg_64 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vreg_64 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vreg_64 } + - { id: 29, class: vreg_64 } +liveins: + - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' } + - { reg: '%vgpr0', virtual-reg: '%3' } +body: | + bb.0 (%ir-block.0): + liveins: %sgpr0_sgpr1, %vgpr0 + + %3 = COPY %vgpr0 + %0 = COPY %sgpr0_sgpr1 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec + %28 = REG_SEQUENCE %3, 1, %27, 2 + %11 = S_MOV_B32 61440 + %12 = S_MOV_B32 0 + %13 = REG_SEQUENCE killed %12, 1, killed %11, 2 + %14 = REG_SEQUENCE killed %5, 17, %13, 18 + %15 = S_MOV_B32 2 + %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec + %17 = REG_SEQUENCE killed %6, 17, %13, 18 + %18 = REG_SEQUENCE killed %4, 17, %13, 18 + %20 = COPY %29 + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec + %22 = COPY %29 + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec + %23 = V_MOV_B32_e32 1090519040, implicit %exec + %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec + %26 = COPY %29 + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec + S_ENDPGM + +... +--- +# GCN-LABEL: name: no_fold_imm_madak_mac_omod_f32 +# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec + +name: no_fold_imm_madak_mac_omod_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64_xexec } + - { id: 5, class: sreg_64_xexec } + - { id: 6, class: sreg_64_xexec } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32_xm0 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sgpr_64 } + - { id: 14, class: sgpr_128 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: sgpr_128 } + - { id: 18, class: sgpr_128 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vreg_64 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vreg_64 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vreg_64 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vreg_64 } + - { id: 29, class: vreg_64 } +liveins: + - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' } + - { reg: '%vgpr0', virtual-reg: '%3' } +body: | + bb.0 (%ir-block.0): + liveins: %sgpr0_sgpr1, %vgpr0 + + %3 = COPY %vgpr0 + %0 = COPY %sgpr0_sgpr1 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec + %28 = REG_SEQUENCE %3, 1, %27, 2 + %11 = S_MOV_B32 61440 + %12 = S_MOV_B32 0 + %13 = REG_SEQUENCE killed %12, 1, killed %11, 2 + %14 = REG_SEQUENCE killed %5, 17, %13, 18 + %15 = S_MOV_B32 2 + %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec + %17 = REG_SEQUENCE killed %6, 17, %13, 18 + %18 = REG_SEQUENCE killed %4, 17, %13, 18 + %20 = COPY %29 + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec + %22 = COPY %29 + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec + %23 = V_MOV_B32_e32 1090519040, implicit %exec + %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec + %26 = COPY %29 + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec + S_ENDPGM + +... +--- +# GCN: name: no_fold_imm_madak_mad_clamp_f32 +# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec + +name: no_fold_imm_madak_mad_clamp_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64_xexec } + - { id: 5, class: sreg_64_xexec } + - { id: 6, class: sreg_64_xexec } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32_xm0 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sgpr_64 } + - { id: 14, class: sgpr_128 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: sgpr_128 } + - { id: 18, class: sgpr_128 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vreg_64 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vreg_64 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vreg_64 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vreg_64 } + - { id: 29, class: vreg_64 } +liveins: + - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' } + - { reg: '%vgpr0', virtual-reg: '%3' } +body: | + bb.0 (%ir-block.0): + liveins: %sgpr0_sgpr1, %vgpr0 + + %3 = COPY %vgpr0 + %0 = COPY %sgpr0_sgpr1 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec + %28 = REG_SEQUENCE %3, 1, %27, 2 + %11 = S_MOV_B32 61440 + %12 = S_MOV_B32 0 + %13 = REG_SEQUENCE killed %12, 1, killed %11, 2 + %14 = REG_SEQUENCE killed %5, 17, %13, 18 + %15 = S_MOV_B32 2 + %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec + %17 = REG_SEQUENCE killed %6, 17, %13, 18 + %18 = REG_SEQUENCE killed %4, 17, %13, 18 + %20 = COPY %29 + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec + %22 = COPY %29 + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec + %23 = V_MOV_B32_e32 1090519040, implicit %exec + %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec + %26 = COPY %29 + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec + S_ENDPGM + +... +--- +# GCN: name: no_fold_imm_madak_mad_omod_f32 +# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec + +name: no_fold_imm_madak_mad_omod_f32 +tracksRegLiveness: true +registers: + - { id: 0, class: sgpr_64 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64_xexec } + - { id: 5, class: sreg_64_xexec } + - { id: 6, class: sreg_64_xexec } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32_xm0 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32_xm0 } + - { id: 12, class: sreg_32_xm0 } + - { id: 13, class: sgpr_64 } + - { id: 14, class: sgpr_128 } + - { id: 15, class: sreg_32_xm0 } + - { id: 16, class: sreg_64 } + - { id: 17, class: sgpr_128 } + - { id: 18, class: sgpr_128 } + - { id: 19, class: vgpr_32 } + - { id: 20, class: vreg_64 } + - { id: 21, class: vgpr_32 } + - { id: 22, class: vreg_64 } + - { id: 23, class: vgpr_32 } + - { id: 24, class: vgpr_32 } + - { id: 25, class: vgpr_32 } + - { id: 26, class: vreg_64 } + - { id: 27, class: vgpr_32 } + - { id: 28, class: vreg_64 } + - { id: 29, class: vreg_64 } +liveins: + - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' } + - { reg: '%vgpr0', virtual-reg: '%3' } +body: | + bb.0 (%ir-block.0): + liveins: %sgpr0_sgpr1, %vgpr0 + + %3 = COPY %vgpr0 + %0 = COPY %sgpr0_sgpr1 + %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %6 = S_LOAD_DWORDX2_IMM %0, 13, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %27 = V_ASHRREV_I32_e32 31, %3, implicit %exec + %28 = REG_SEQUENCE %3, 1, %27, 2 + %11 = S_MOV_B32 61440 + %12 = S_MOV_B32 0 + %13 = REG_SEQUENCE killed %12, 1, killed %11, 2 + %14 = REG_SEQUENCE killed %5, 17, %13, 18 + %15 = S_MOV_B32 2 + %29 = V_LSHL_B64 killed %28, killed %15, implicit %exec + %17 = REG_SEQUENCE killed %6, 17, %13, 18 + %18 = REG_SEQUENCE killed %4, 17, %13, 18 + %20 = COPY %29 + %19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, implicit %exec + %22 = COPY %29 + %21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, implicit %exec + %23 = V_MOV_B32_e32 1090519040, implicit %exec + %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec + %26 = COPY %29 + BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, implicit %exec + S_ENDPGM + +...