From a42126004267170878d05d7e3564aae05b0a9023 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Wed, 20 Jan 2021 11:38:05 +0100 Subject: [PATCH] [AMDGPU][GlobalISel] Avoid selecting S_PACK with constants If constants are hidden behind G_ANYEXT we can treat them same way as G_SEXT. For that purpose we extend getConstantVRegValWithLookThrough with option to handle G_ANYEXT same way as G_SEXT. Differential Revision: https://reviews.llvm.org/D92219 --- include/llvm/CodeGen/GlobalISel/Utils.h | 5 ++++- lib/CodeGen/GlobalISel/Utils.cpp | 7 ++++++- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 6 ++++-- .../inst-select-build-vector-trunc.v2s16.mir | 6 ++---- .../AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll | 16 ++++------------ 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/Utils.h b/include/llvm/CodeGen/GlobalISel/Utils.h index ed75cde6f31..68553ab5b1a 100644 --- a/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/include/llvm/CodeGen/GlobalISel/Utils.h @@ -146,10 +146,13 @@ struct ValueAndVReg { /// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. /// When \p HandleFConstants == false the function bails on G_FCONSTANTs. +/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as +/// G_SEXT. Optional getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true, - bool HandleFConstants = true); + bool HandleFConstants = true, + bool LookThroughAnyExt = false); const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 868385c2def..cd248322448 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -283,7 +283,7 @@ Optional llvm::getConstantVRegSExtVal(Register VReg, Optional llvm::getConstantVRegValWithLookThrough( Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, - bool HandleFConstant) { + bool HandleFConstant, bool LookThroughAnyExt) { SmallVector, 4> SeenOpcodes; MachineInstr *MI; auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { @@ -310,6 +310,10 @@ Optional llvm::getConstantVRegValWithLookThrough( while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && LookThroughInstrs) { switch (MI->getOpcode()) { + case TargetOpcode::G_ANYEXT: + if (!LookThroughAnyExt) + return None; + LLVM_FALLTHROUGH; case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: @@ -343,6 +347,7 @@ Optional llvm::getConstantVRegValWithLookThrough( case TargetOpcode::G_TRUNC: Val = Val.trunc(OpcodeAndSize.second); break; + case TargetOpcode::G_ANYEXT: case TargetOpcode::G_SEXT: Val = Val.sext(OpcodeAndSize.second); break; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index c6ae4a1a27e..2fae3d69a70 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -597,9 +597,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC( const DebugLoc &DL = MI.getDebugLoc(); MachineBasicBlock *BB = MI.getParent(); - auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true); + auto ConstSrc1 = + getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true); if (ConstSrc1) { - auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true); + auto ConstSrc0 = + getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true); if (ConstSrc0) { const int64_t K0 = ConstSrc0->Value.getSExtValue(); const int64_t K1 = ConstSrc1->Value.getSExtValue(); diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir index 056ea79a989..ccd6fd71b3d 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-build-vector-trunc.v2s16.mir @@ -579,10 +579,8 @@ body: | bb.0: ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant - ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123 - ; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456 - ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[S_MOV_B32_1]] - ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]] + ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539 + ; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]] %0:sgpr(s16) = G_CONSTANT i16 123 %1:sgpr(s16) = G_CONSTANT i16 456 %2:sgpr(s32) = G_ANYEXT %0 diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll index c087cd0bc8b..150f17f909a 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll @@ -116,18 +116,14 @@ define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) { ; GFX906-LABEL: v_fdot2_inline_literal_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_movk_i32 s4, 0x4000 -; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX906-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 +; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fdot2_inline_literal_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_movk_i32 s4, 0x4000 -; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX10-NEXT: v_dot2_f32_f16 v0, s4, v0, v1 +; GFX10-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> , <2 x half> %b, float %c, i1 false) ret float %ret @@ -137,18 +133,14 @@ define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) { ; GFX906-LABEL: v_fdot2_inline_literal_b: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_movk_i32 s4, 0x4000 -; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX906-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 +; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fdot2_inline_literal_b: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_movk_i32 s4, 0x4000 -; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4 -; GFX10-NEXT: v_dot2_f32_f16 v0, v0, s4, v1 +; GFX10-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1] ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> , float %c, i1 false) ret float %ret