mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[AArch64][GlobalISel] Properly implement widening for TB(N)Z
When we have to widen to a 64-bit register, we have to emit a SUBREG_TO_REG. Add a general-purpose widening helpe which emits the correct SUBREG_TO_REG instruction based off of a desired size and add a testcase. Also remove some asserts which are technically incorrect in `emitTestBit`. - p0 doesn't count as a scalar type, so we need to check `!Ty.isVector()` instead - Whenever we have a s1, the Size/Bit checks are too conservative, so just remove them Replace these asserts with less conservative ones where applicable. Differential Revision: https://reviews.llvm.org/D74427
This commit is contained in:
parent
ed8bd8f4b1
commit
7e17214e5b
@ -273,6 +273,8 @@ private:
|
||||
/// new copy.
|
||||
Register narrowExtendRegIfNeeded(Register ExtReg,
|
||||
MachineIRBuilder &MIB) const;
|
||||
Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
|
||||
MachineIRBuilder &MIB) const;
|
||||
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
|
||||
|
||||
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
@ -1124,26 +1126,25 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
|
||||
MachineInstr *AArch64InstructionSelector::emitTestBit(
|
||||
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
|
||||
MachineIRBuilder &MIB) const {
|
||||
MachineRegisterInfo &MRI = *MIB.getMRI();
|
||||
#ifndef NDEBUG
|
||||
assert(TestReg.isValid());
|
||||
assert(ProduceNonFlagSettingCondBr &&
|
||||
"Cannot emit TB(N)Z with speculation tracking!");
|
||||
assert(TestReg.isValid());
|
||||
LLT Ty = MRI.getType(TestReg);
|
||||
unsigned Size = Ty.getSizeInBits();
|
||||
assert(Bit < Size &&
|
||||
"Bit to test must be smaler than the size of a test register!");
|
||||
assert(Ty.isScalar() && "Expected a scalar!");
|
||||
assert(Size >= 32 && "Expected at least a 32-bit register!");
|
||||
#endif
|
||||
MachineRegisterInfo &MRI = *MIB.getMRI();
|
||||
|
||||
// Attempt to optimize the test bit by walking over instructions.
|
||||
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
|
||||
bool UseWReg = Bit < 32;
|
||||
LLT Ty = MRI.getType(TestReg);
|
||||
unsigned Size = Ty.getSizeInBits();
|
||||
assert(!Ty.isVector() && "Expected a scalar!");
|
||||
assert(Bit < 64 && "Bit is too large!");
|
||||
|
||||
// When the test register is a 64-bit register, we have to narrow to make
|
||||
// TBNZW work.
|
||||
if (UseWReg)
|
||||
bool UseWReg = Bit < 32;
|
||||
unsigned NecessarySize = UseWReg ? 32 : 64;
|
||||
if (Size < NecessarySize)
|
||||
TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
|
||||
else if (Size > NecessarySize)
|
||||
TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
|
||||
|
||||
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
|
||||
@ -5154,6 +5155,52 @@ Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
|
||||
return Copy.getReg(0);
|
||||
}
|
||||
|
||||
Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
|
||||
Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
|
||||
assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
|
||||
MachineRegisterInfo &MRI = *MIB.getMRI();
|
||||
unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
|
||||
assert(WideSize >= NarrowSize &&
|
||||
"WideSize cannot be smaller than NarrowSize!");
|
||||
|
||||
// If the sizes match, just return the register.
|
||||
//
|
||||
// If NarrowSize is an s1, then we can select it to any size, so we'll treat
|
||||
// it as a don't care.
|
||||
if (NarrowSize == WideSize || NarrowSize == 1)
|
||||
return Reg;
|
||||
|
||||
// Now check the register classes.
|
||||
const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
|
||||
const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
|
||||
const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
|
||||
assert(OrigRC && "Could not determine narrow RC?");
|
||||
assert(WideRC && "Could not determine wide RC?");
|
||||
|
||||
// If the sizes differ, but the register classes are the same, there is no
|
||||
// need to insert a SUBREG_TO_REG.
|
||||
//
|
||||
// For example, an s8 that's supposed to be a GPR will be selected to either
|
||||
// a GPR32 or a GPR64 register. Note that this assumes that the s8 will
|
||||
// always end up on a GPR32.
|
||||
if (OrigRC == WideRC)
|
||||
return Reg;
|
||||
|
||||
// We have two different register classes. Insert a SUBREG_TO_REG.
|
||||
unsigned SubReg = 0;
|
||||
getSubRegForClass(OrigRC, TRI, SubReg);
|
||||
assert(SubReg && "Couldn't determine subregister?");
|
||||
|
||||
// Build the SUBREG_TO_REG and return the new, widened register.
|
||||
auto SubRegToReg =
|
||||
MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
|
||||
.addImm(0)
|
||||
.addUse(Reg)
|
||||
.addImm(SubReg);
|
||||
constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
|
||||
return SubRegToReg.getReg(0);
|
||||
}
|
||||
|
||||
/// Select an "extended register" operand. This operand folds in an extend
|
||||
/// followed by an optional left shift.
|
||||
InstructionSelector::ComplexRendererFns
|
||||
|
@ -78,9 +78,8 @@ body: |
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $h0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
|
||||
; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
|
||||
; CHECK: TBNZW [[COPY]], 3, %bb.1
|
||||
; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK: TBNZW %copy, 3, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
|
193
test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
Normal file
193
test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
Normal file
@ -0,0 +1,193 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Test widening and narrowing on test bit operations using subregister copies
|
||||
# or SUBREG_TO_REG.
|
||||
--- |
|
||||
@glob = external unnamed_addr global i1, align 4
|
||||
define void @s1_no_copy() { ret void }
|
||||
define void @s16_no_copy() { ret void }
|
||||
define void @p0_no_copy() { ret void }
|
||||
define void @widen_s32_to_s64() { ret void }
|
||||
define void @widen_s16_to_s64() { ret void }
|
||||
define void @narrow_s64_to_s32() { ret void }
|
||||
|
||||
...
|
||||
---
|
||||
name: s1_no_copy
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: s1_no_copy
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %narrow:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: TBNZW %narrow, 0, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
%narrow:gpr(s1) = G_IMPLICIT_DEF
|
||||
|
||||
; There should be no copy here, because the s1 can be selected to a GPR32.
|
||||
G_BRCOND %narrow(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: s16_no_copy
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: s16_no_copy
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %narrow:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: TBNZW %narrow, 0, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
%narrow:gpr(s16) = G_IMPLICIT_DEF
|
||||
%trunc:gpr(s1) = G_TRUNC %narrow(s16)
|
||||
|
||||
; Look through the G_TRUNC to get the G_IMPLICIT_DEF. We don't need a
|
||||
; SUBREG_TO_REG here, because the s16 will end up on a 32-bit register.
|
||||
G_BRCOND %trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: p0_no_copy
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: p0_no_copy
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %glob:gpr64common = MOVaddr target-flags(aarch64-page) @glob, target-flags(aarch64-pageoff, aarch64-nc) @glob
|
||||
; CHECK: %load:gpr32 = LDRBBui %glob, 0 :: (dereferenceable load 1 from @glob, align 4)
|
||||
; CHECK: TBNZW %load, 0, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
%glob:gpr(p0) = G_GLOBAL_VALUE @glob
|
||||
%load:gpr(s8) = G_LOAD %glob(p0) :: (dereferenceable load 1 from @glob, align 4)
|
||||
%trunc:gpr(s1) = G_TRUNC %load(s8)
|
||||
|
||||
; Look through G_TRUNC to get the load. The load is into a s8, which will
|
||||
; be selected to a GPR32, so we don't need a copy.
|
||||
G_BRCOND %trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: widen_s32_to_s64
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: widen_s32_to_s64
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: %reg:gpr32all = COPY $w0
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
|
||||
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $w0
|
||||
%reg:gpr(s32) = COPY $w0
|
||||
%zext:gpr(s64) = G_ZEXT %reg(s32)
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8589934592
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%and:gpr(s64) = G_AND %zext, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
|
||||
|
||||
; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
|
||||
; bit 33. The subregister should be sub_32.
|
||||
%trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: widen_s16_to_s64
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: widen_s16_to_s64
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: %reg:gpr32 = IMPLICIT_DEF
|
||||
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
|
||||
; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
%reg:gpr(s16) = G_IMPLICIT_DEF
|
||||
%zext:gpr(s64) = G_ZEXT %reg(s16)
|
||||
%bit:gpr(s64) = G_CONSTANT i64 8589934592
|
||||
%zero:gpr(s64) = G_CONSTANT i64 0
|
||||
%and:gpr(s64) = G_AND %zext, %bit
|
||||
%cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
|
||||
|
||||
; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
|
||||
; bit 33. The subregister should be sub_32, because s16 will end up on a
|
||||
; GPR32.
|
||||
%trunc:gpr(s1) = G_TRUNC %cmp(s32)
|
||||
G_BRCOND %trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
||||
...
|
||||
---
|
||||
name: narrow_s64_to_s32
|
||||
alignment: 4
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; CHECK-LABEL: name: narrow_s64_to_s32
|
||||
; CHECK: bb.0:
|
||||
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: %wide:gpr64 = COPY $x0
|
||||
; CHECK: %trunc:gpr32 = COPY %wide.sub_32
|
||||
; CHECK: TBNZW %trunc, 0, %bb.1
|
||||
; CHECK: B %bb.0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: RET_ReallyLR
|
||||
bb.0:
|
||||
successors: %bb.0, %bb.1
|
||||
liveins: $x0
|
||||
%wide:gpr(s64) = COPY $x0
|
||||
|
||||
; We should narrow using a subregister copy here.
|
||||
%trunc:gpr(s1) = G_TRUNC %wide(s64)
|
||||
G_BRCOND %trunc(s1), %bb.1
|
||||
G_BR %bb.0
|
||||
bb.1:
|
||||
RET_ReallyLR
|
Loading…
Reference in New Issue
Block a user