1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[AArch64][GlobalISel] Properly implement widening for TB(N)Z

When we have to widen to a 64-bit register, we have to emit a SUBREG_TO_REG.

Add a general-purpose widening helpe  which emits the correct SUBREG_TO_REG
instruction based off of a desired size and add a testcase.

Also remove some asserts which are technically incorrect in `emitTestBit`.

- p0 doesn't count as a scalar type, so we need to check `!Ty.isVector()`
instead

- Whenever we have a s1, the Size/Bit checks are too conservative, so just
remove them

Replace these asserts with less conservative ones where applicable.

Differential Revision: https://reviews.llvm.org/D74427
This commit is contained in:
Jessica Paquette 2020-02-11 10:22:38 -08:00
parent ed8bd8f4b1
commit 7e17214e5b
3 changed files with 254 additions and 15 deletions

View File

@ -273,6 +273,8 @@ private:
/// new copy.
Register narrowExtendRegIfNeeded(Register ExtReg,
MachineIRBuilder &MIB) const;
Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@ -1124,26 +1126,25 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineInstr *AArch64InstructionSelector::emitTestBit(
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
MachineRegisterInfo &MRI = *MIB.getMRI();
#ifndef NDEBUG
assert(TestReg.isValid());
assert(ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!");
assert(TestReg.isValid());
LLT Ty = MRI.getType(TestReg);
unsigned Size = Ty.getSizeInBits();
assert(Bit < Size &&
"Bit to test must be smaler than the size of a test register!");
assert(Ty.isScalar() && "Expected a scalar!");
assert(Size >= 32 && "Expected at least a 32-bit register!");
#endif
MachineRegisterInfo &MRI = *MIB.getMRI();
// Attempt to optimize the test bit by walking over instructions.
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
bool UseWReg = Bit < 32;
LLT Ty = MRI.getType(TestReg);
unsigned Size = Ty.getSizeInBits();
assert(!Ty.isVector() && "Expected a scalar!");
assert(Bit < 64 && "Bit is too large!");
// When the test register is a 64-bit register, we have to narrow to make
// TBNZW work.
if (UseWReg)
bool UseWReg = Bit < 32;
unsigned NecessarySize = UseWReg ? 32 : 64;
if (Size < NecessarySize)
TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
else if (Size > NecessarySize)
TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
@ -5154,6 +5155,52 @@ Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
return Copy.getReg(0);
}
Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
MachineRegisterInfo &MRI = *MIB.getMRI();
unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
assert(WideSize >= NarrowSize &&
"WideSize cannot be smaller than NarrowSize!");
// If the sizes match, just return the register.
//
// If NarrowSize is an s1, then we can select it to any size, so we'll treat
// it as a don't care.
if (NarrowSize == WideSize || NarrowSize == 1)
return Reg;
// Now check the register classes.
const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
assert(OrigRC && "Could not determine narrow RC?");
assert(WideRC && "Could not determine wide RC?");
// If the sizes differ, but the register classes are the same, there is no
// need to insert a SUBREG_TO_REG.
//
// For example, an s8 that's supposed to be a GPR will be selected to either
// a GPR32 or a GPR64 register. Note that this assumes that the s8 will
// always end up on a GPR32.
if (OrigRC == WideRC)
return Reg;
// We have two different register classes. Insert a SUBREG_TO_REG.
unsigned SubReg = 0;
getSubRegForClass(OrigRC, TRI, SubReg);
assert(SubReg && "Couldn't determine subregister?");
// Build the SUBREG_TO_REG and return the new, widened register.
auto SubRegToReg =
MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
.addImm(0)
.addUse(Reg)
.addImm(SubReg);
constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
return SubRegToReg.getReg(0);
}
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns

View File

@ -78,9 +78,8 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $h0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
; CHECK: TBNZW [[COPY]], 3, %bb.1
; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]]
; CHECK: TBNZW %copy, 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR

View File

@ -0,0 +1,193 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# Test widening and narrowing on test bit operations using subregister copies
# or SUBREG_TO_REG.
--- |
@glob = external unnamed_addr global i1, align 4
define void @s1_no_copy() { ret void }
define void @s16_no_copy() { ret void }
define void @p0_no_copy() { ret void }
define void @widen_s32_to_s64() { ret void }
define void @widen_s16_to_s64() { ret void }
define void @narrow_s64_to_s32() { ret void }
...
---
name: s1_no_copy
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: s1_no_copy
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %narrow:gpr32 = IMPLICIT_DEF
; CHECK: TBNZW %narrow, 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
%narrow:gpr(s1) = G_IMPLICIT_DEF
; There should be no copy here, because the s1 can be selected to a GPR32.
G_BRCOND %narrow(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: s16_no_copy
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: s16_no_copy
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %narrow:gpr32 = IMPLICIT_DEF
; CHECK: TBNZW %narrow, 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
%narrow:gpr(s16) = G_IMPLICIT_DEF
%trunc:gpr(s1) = G_TRUNC %narrow(s16)
; Look through the G_TRUNC to get the G_IMPLICIT_DEF. We don't need a
; SUBREG_TO_REG here, because the s16 will end up on a 32-bit register.
G_BRCOND %trunc(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: p0_no_copy
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: p0_no_copy
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %glob:gpr64common = MOVaddr target-flags(aarch64-page) @glob, target-flags(aarch64-pageoff, aarch64-nc) @glob
; CHECK: %load:gpr32 = LDRBBui %glob, 0 :: (dereferenceable load 1 from @glob, align 4)
; CHECK: TBNZW %load, 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
%glob:gpr(p0) = G_GLOBAL_VALUE @glob
%load:gpr(s8) = G_LOAD %glob(p0) :: (dereferenceable load 1 from @glob, align 4)
%trunc:gpr(s1) = G_TRUNC %load(s8)
; Look through G_TRUNC to get the load. The load is into a s8, which will
; be selected to a GPR32, so we don't need a copy.
G_BRCOND %trunc(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: widen_s32_to_s64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: widen_s32_to_s64
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $w0
; CHECK: %reg:gpr32all = COPY $w0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $w0
%reg:gpr(s32) = COPY $w0
%zext:gpr(s64) = G_ZEXT %reg(s32)
%bit:gpr(s64) = G_CONSTANT i64 8589934592
%zero:gpr(s64) = G_CONSTANT i64 0
%and:gpr(s64) = G_AND %zext, %bit
%cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
; bit 33. The subregister should be sub_32.
%trunc:gpr(s1) = G_TRUNC %cmp(s32)
G_BRCOND %trunc(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: widen_s16_to_s64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: widen_s16_to_s64
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: %reg:gpr32 = IMPLICIT_DEF
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
%reg:gpr(s16) = G_IMPLICIT_DEF
%zext:gpr(s64) = G_ZEXT %reg(s16)
%bit:gpr(s64) = G_CONSTANT i64 8589934592
%zero:gpr(s64) = G_CONSTANT i64 0
%and:gpr(s64) = G_AND %zext, %bit
%cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
; bit 33. The subregister should be sub_32, because s16 will end up on a
; GPR32.
%trunc:gpr(s1) = G_TRUNC %cmp(s32)
G_BRCOND %trunc(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR
...
---
name: narrow_s64_to_s32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: narrow_s64_to_s32
; CHECK: bb.0:
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $x0
; CHECK: %wide:gpr64 = COPY $x0
; CHECK: %trunc:gpr32 = COPY %wide.sub_32
; CHECK: TBNZW %trunc, 0, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
bb.0:
successors: %bb.0, %bb.1
liveins: $x0
%wide:gpr(s64) = COPY $x0
; We should narrow using a subregister copy here.
%trunc:gpr(s1) = G_TRUNC %wide(s64)
G_BRCOND %trunc(s1), %bb.1
G_BR %bb.0
bb.1:
RET_ReallyLR