mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[AMDGPU][GlobalISel] Avoid selecting S_PACK with constants
If constants are hidden behind G_ANYEXT we can treat them same way as G_SEXT. For that purpose we extend getConstantVRegValWithLookThrough with option to handle G_ANYEXT same way as G_SEXT. Differential Revision: https://reviews.llvm.org/D92219
This commit is contained in:
parent
5edb12bded
commit
a421260042
@ -146,10 +146,13 @@ struct ValueAndVReg {
|
||||
/// When \p LookThroughInstrs == false this function behaves like
|
||||
/// getConstantVRegVal.
|
||||
/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
|
||||
/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
|
||||
/// G_SEXT.
|
||||
Optional<ValueAndVReg>
|
||||
getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
|
||||
bool LookThroughInstrs = true,
|
||||
bool HandleFConstants = true);
|
||||
bool HandleFConstants = true,
|
||||
bool LookThroughAnyExt = false);
|
||||
const ConstantFP* getConstantFPVRegVal(Register VReg,
|
||||
const MachineRegisterInfo &MRI);
|
||||
|
||||
|
@ -283,7 +283,7 @@ Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
|
||||
|
||||
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
|
||||
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
|
||||
bool HandleFConstant) {
|
||||
bool HandleFConstant, bool LookThroughAnyExt) {
|
||||
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
|
||||
MachineInstr *MI;
|
||||
auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
|
||||
@ -310,6 +310,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
|
||||
while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
|
||||
LookThroughInstrs) {
|
||||
switch (MI->getOpcode()) {
|
||||
case TargetOpcode::G_ANYEXT:
|
||||
if (!LookThroughAnyExt)
|
||||
return None;
|
||||
LLVM_FALLTHROUGH;
|
||||
case TargetOpcode::G_TRUNC:
|
||||
case TargetOpcode::G_SEXT:
|
||||
case TargetOpcode::G_ZEXT:
|
||||
@ -343,6 +347,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
|
||||
case TargetOpcode::G_TRUNC:
|
||||
Val = Val.trunc(OpcodeAndSize.second);
|
||||
break;
|
||||
case TargetOpcode::G_ANYEXT:
|
||||
case TargetOpcode::G_SEXT:
|
||||
Val = Val.sext(OpcodeAndSize.second);
|
||||
break;
|
||||
|
@ -597,9 +597,11 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
MachineBasicBlock *BB = MI.getParent();
|
||||
|
||||
auto ConstSrc1 = getConstantVRegValWithLookThrough(Src1, *MRI, true, true);
|
||||
auto ConstSrc1 =
|
||||
getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
|
||||
if (ConstSrc1) {
|
||||
auto ConstSrc0 = getConstantVRegValWithLookThrough(Src0, *MRI, true, true);
|
||||
auto ConstSrc0 =
|
||||
getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
|
||||
if (ConstSrc0) {
|
||||
const int64_t K0 = ConstSrc0->Value.getSExtValue();
|
||||
const int64_t K1 = ConstSrc1->Value.getSExtValue();
|
||||
|
@ -579,10 +579,8 @@ body: |
|
||||
bb.0:
|
||||
|
||||
; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_anyext_constant_anyext_constant
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 123
|
||||
; GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 456
|
||||
; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[S_MOV_B32_]], [[S_MOV_B32_1]]
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
|
||||
; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 29884539
|
||||
; GFX9: S_ENDPGM 0, implicit [[S_MOV_B32_]]
|
||||
%0:sgpr(s16) = G_CONSTANT i16 123
|
||||
%1:sgpr(s16) = G_CONSTANT i16 456
|
||||
%2:sgpr(s32) = G_ANYEXT %0
|
||||
|
@ -116,18 +116,14 @@ define float @v_fdot2_inline_literal_a(<2 x half> %b, float %c) {
|
||||
; GFX906-LABEL: v_fdot2_inline_literal_a:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: s_movk_i32 s4, 0x4000
|
||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4
|
||||
; GFX906-NEXT: v_dot2_f32_f16 v0, s4, v0, v1
|
||||
; GFX906-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-LABEL: v_fdot2_inline_literal_a:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_movk_i32 s4, 0x4000
|
||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4
|
||||
; GFX10-NEXT: v_dot2_f32_f16 v0, s4, v0, v1
|
||||
; GFX10-NEXT: v_dot2_f32_f16 v0, 2.0, v0, v1 op_sel_hi:[0,1,1]
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> <half 2.0, half 2.0>, <2 x half> %b, float %c, i1 false)
|
||||
ret float %ret
|
||||
@ -137,18 +133,14 @@ define float @v_fdot2_inline_literal_b(<2 x half> %a, float %c) {
|
||||
; GFX906-LABEL: v_fdot2_inline_literal_b:
|
||||
; GFX906: ; %bb.0:
|
||||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-NEXT: s_movk_i32 s4, 0x4000
|
||||
; GFX906-NEXT: s_pack_ll_b32_b16 s4, s4, s4
|
||||
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, s4, v1
|
||||
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
|
||||
; GFX906-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX10-LABEL: v_fdot2_inline_literal_b:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX10-NEXT: s_movk_i32 s4, 0x4000
|
||||
; GFX10-NEXT: s_pack_ll_b32_b16 s4, s4, s4
|
||||
; GFX10-NEXT: v_dot2_f32_f16 v0, v0, s4, v1
|
||||
; GFX10-NEXT: v_dot2_f32_f16 v0, v0, 2.0, v1 op_sel_hi:[1,0,1]
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> <half 2.0, half 2.0>, float %c, i1 false)
|
||||
ret float %ret
|
||||
|
Loading…
Reference in New Issue
Block a user