diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 99fa46eb665..b55ecd6c6c2 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -19,6 +19,7 @@ #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" @@ -1564,12 +1565,6 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { return selectFlatOffsetImpl(Root); } -// FIXME: Implement -static bool signBitIsZero(const MachineOperand &Op, - const MachineRegisterInfo &MRI) { - return false; -} - static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); @@ -1630,7 +1625,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { RHSDef->getOperand(1).getCImm()->getSExtValue(); if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && (!STI.privateMemoryResourceIsRangeChecked() || - signBitIsZero(LHS, MRI))) { + KnownBits->signBitIsZero(LHS.getReg()))) { if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) FI = LHSDef->getOperand(1).getIndex(); else @@ -1680,7 +1675,7 @@ bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, // On Southern Islands instruction with a negative base value and an offset // don't seem to work. - return signBitIsZero(Base, MRI); + return KnownBits->signBitIsZero(Base.getReg()); } InstructionSelector::ComplexRendererFns diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir index d9a33f0cf0d..f267163206f 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -821,6 +821,51 @@ body: | --- +name: load_local_s32_from_1_gep_65535_known_bits_base_address +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX7: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[V_AND_B32_e64_]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535_known_bits_base_address + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_AND_B32_e64_]], 65535, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2147483647 + %2:vgpr(s32) = G_AND %0, %1 + %3:vgpr(p3) = G_INTTOPTR %2 + %4:vgpr(s32) = G_CONSTANT i32 65535 + %5:vgpr(p3) = G_GEP %3, %4 + %6:vgpr(s32) = G_LOAD %5 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %6 + +... + +--- + name: load_local_s32_from_1_gep_65536 legalized: true regBankSelected: true diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index e969f457fab..dbf552433bd 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -553,6 +553,46 @@ body: | --- +name: load_private_s32_from_1_gep_2047_known_bits +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047_known_bits + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec + ; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2147483647 + %2:vgpr(s32) = G_AND %0, %1 + %3:vgpr(p5) = G_INTTOPTR %2 + %4:vgpr(s32) = G_CONSTANT i32 2047 + %5:vgpr(p5) = G_GEP %3, %4 + %6:vgpr(s32) = G_LOAD %5 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %6 + +... + +--- + name: load_private_s32_from_1_gep_2048 legalized: true regBankSelected: true