1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[ARM GlobalISel] Support G_CTLZ and G_CTLZ_ZERO_UNDEF

We can now select CLZ via the TableGen'erated code, so support G_CTLZ
and G_CTLZ_ZERO_UNDEF throughout the pipeline for types <= s32.

Legalizer:
If the CLZ instruction is available, use it for both G_CTLZ and
G_CTLZ_ZERO_UNDEF. Otherwise, use a libcall for G_CTLZ_ZERO_UNDEF and
lower G_CTLZ in terms of it.

In order to achieve this we need to add support to the LegalizerHelper
for the legalization of G_CTLZ_ZERO_UNDEF for s32 as a libcall (__clzsi2).

We also need to allow lowering of G_CTLZ in terms of G_CTLZ_ZERO_UNDEF
if that is supported as a libcall, as opposed to just if it is Legal or
Custom. Due to a minor refactoring of the helper function in charge of
this, we will also allow the same behaviour for G_CTTZ and G_CTPOP.
This is not going to be a problem in practice since we don't yet have
support for treating G_CTTZ and G_CTPOP as libcalls (not even in
DAGISel).

Reg bank select:
Map G_CTLZ to GPR. G_CTLZ_ZERO_UNDEF should not make it to this point.

Instruction select:
Nothing to do.

llvm-svn: 347545
This commit is contained in:
Diana Picus 2018-11-26 11:07:02 +00:00
parent ac84bf3922
commit 7511d6e3fd
6 changed files with 255 additions and 9 deletions

View File

@ -93,6 +93,9 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_UREM: case TargetOpcode::G_UREM:
assert(Size == 32 && "Unsupported size"); assert(Size == 32 && "Unsupported size");
return RTLIB::UREM_I32; return RTLIB::UREM_I32;
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
assert(Size == 32 && "Unsupported size");
return RTLIB::CTLZ_I32;
case TargetOpcode::G_FADD: case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size"); assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@ -189,7 +192,8 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_SDIV: case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV: case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM: case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: { case TargetOpcode::G_UREM:
case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
Type *HLTy = Type::getInt32Ty(Ctx); Type *HLTy = Type::getInt32Ty(Ctx);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized) if (Status != Legalized)
@ -1108,9 +1112,9 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
unsigned Opc = MI.getOpcode(); unsigned Opc = MI.getOpcode();
auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
auto isLegalOrCustom = [this](const LegalityQuery &Q) { auto isSupported = [this](const LegalityQuery &Q) {
auto QAction = LI.getAction(Q).Action; auto QAction = LI.getAction(Q).Action;
return QAction == Legal || QAction == Custom; return QAction == Legal || QAction == Libcall || QAction == Custom;
}; };
switch (Opc) { switch (Opc) {
default: default:
@ -1124,9 +1128,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_CTLZ: { case TargetOpcode::G_CTLZ: {
unsigned SrcReg = MI.getOperand(1).getReg(); unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits(); unsigned Len = Ty.getSizeInBits();
if (isLegalOrCustom({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
// If CTLZ_ZERO_UNDEF is legal or custom, emit that and a select with // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
// zero.
auto MIBCtlzZU = auto MIBCtlzZU =
MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg); MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, Ty, SrcReg);
auto MIBZero = MIRBuilder.buildConstant(Ty, 0); auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
@ -1173,7 +1176,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_CTTZ: { case TargetOpcode::G_CTTZ: {
unsigned SrcReg = MI.getOperand(1).getReg(); unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits(); unsigned Len = Ty.getSizeInBits();
if (isLegalOrCustom({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
// zero. // zero.
auto MIBCttzZU = auto MIBCttzZU =
@ -1197,8 +1200,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto MIBTmp = MIRBuilder.buildInstr( auto MIBTmp = MIRBuilder.buildInstr(
TargetOpcode::G_AND, Ty, MIBNot, TargetOpcode::G_AND, Ty, MIBNot,
MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1)); MIRBuilder.buildInstr(TargetOpcode::G_ADD, Ty, SrcReg, MIBCstNeg1));
if (!isLegalOrCustom({TargetOpcode::G_CTPOP, {Ty}}) && if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
isLegalOrCustom({TargetOpcode::G_CTLZ, {Ty}})) { isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
MIRBuilder.buildInstr( MIRBuilder.buildInstr(
TargetOpcode::G_SUB, MI.getOperand(0).getReg(), TargetOpcode::G_SUB, MI.getOperand(0).getReg(),

View File

@ -109,6 +109,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32}); getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}).legalFor({s32});
if (ST.hasV5TOps()) {
getActionDefinitionsBuilder(G_CTLZ)
.legalFor({s32})
.clampScalar(0, s32, s32);
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
.lowerFor({s32})
.clampScalar(0, s32, s32);
} else {
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
.libcallFor({s32})
.clampScalar(0, s32, s32);
getActionDefinitionsBuilder(G_CTLZ)
.lowerFor({s32})
.clampScalar(0, s32, s32);
}
getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}}); getActionDefinitionsBuilder(G_GEP).legalFor({{p0, s32}});
getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0}, getActionDefinitionsBuilder(G_SELECT).legalForCartesianProduct({s32, p0},

View File

@ -234,6 +234,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_GEP: case G_GEP:
case G_INTTOPTR: case G_INTTOPTR:
case G_PTRTOINT: case G_PTRTOINT:
case G_CTLZ:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in // FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings. // the real world we would use different mappings.
OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];

View File

@ -0,0 +1,177 @@
# RUN: llc -mtriple arm-linux-gnueabi -mattr=+v5t -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,CLZ
# RUN: llc -mtriple arm-linux-gnueabi -mattr=-v5t -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,LIBCALLS
--- |
define void @test_ctlz_s32() { ret void }
define void @test_ctlz_zero_undef_s32() { ret void }
; same as above but with extensions
define void @test_ctlz_s16() { ret void }
define void @test_ctlz_zero_undef_s8() { ret void }
...
---
name: test_ctlz_s32
# CHECK-LABEL: name: test_ctlz_s32
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $r0
; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0
%0(s32) = COPY $r0
; CLZ: [[R:%[0-9]+]]:_(s32) = G_CTLZ [[X]]
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: ADJCALLSTACKDOWN
; LIBCALLS: $r0 = COPY [[X]]
; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0
; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = COPY $r0
; LIBCALLS: ADJCALLSTACKUP
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[X]](s32), [[ZERO]]
; LIBCALLS: [[R:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[COUNT]]
; LIBCALLS-NOT: G_CTLZ
%1(s32) = G_CTLZ %0
; CHECK: $r0 = COPY [[R]]
$r0 = COPY %1(s32)
BX_RET 14, $noreg, implicit $r0
...
---
name: test_ctlz_zero_undef_s32
# CHECK-LABEL: name: test_ctlz_zero_undef_s32
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
liveins: $r0
; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0
%0(s32) = COPY $r0
; CLZ: [[R:%[0-9]+]]:_(s32) = G_CTLZ [[X]]
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: ADJCALLSTACKDOWN
; LIBCALLS: $r0 = COPY [[X]]
; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0
; LIBCALLS: [[R:%[0-9]+]]:_(s32) = COPY $r0
; LIBCALLS: ADJCALLSTACKUP
; LIBCALLS-NOT: G_CTLZ
%1(s32) = G_CTLZ_ZERO_UNDEF %0
; CHECK: $r0 = COPY [[R]]
$r0 = COPY %1(s32)
BX_RET 14, $noreg, implicit $r0
...
---
name: test_ctlz_s16
# CHECK-LABEL: name: test_ctlz_s16
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
- { id: 3, class: _ }
body: |
bb.0:
liveins: $r0
; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0
; CHECK: [[BITMASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK: [[XAGAIN:%[0-9]+]]:_(s32) = COPY [[X]]
; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[XAGAIN]], [[BITMASK]]
%0(s32) = COPY $r0
%1(s16) = G_TRUNC %0(s32)
; Check that the operation is performed for 32 bits
; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ [[X32]]
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: ADJCALLSTACKDOWN
; LIBCALLS: $r0 = COPY [[X32]]
; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0
; LIBCALLS: [[UNDEFCOUNT:%[0-9]+]]:_(s32) = COPY $r0
; LIBCALLS: ADJCALLSTACKUP
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; LIBCALLS: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; LIBCALLS: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), [[ZERO]]
; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[UNDEFCOUNT]]
; LIBCALLS-NOT: G_CTLZ
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]]
%2(s16) = G_CTLZ %1
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK: [[RAGAIN:%[0-9]+]]:_(s32) = COPY [[R32]]
; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[RAGAIN]], [[BITDIFF]]
; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]]
; CHECK: $r0 = COPY [[R]]
%3(s32) = G_SEXT %2(s16)
$r0 = COPY %3(s32)
BX_RET 14, $noreg, implicit $r0
...
---
name: test_ctlz_zero_undef_s8
# CHECK-LABEL: name: test_ctlz_zero_undef_s8
legalized: false
# CHECK: legalized: true
regBankSelected: false
selected: false
tracksRegLiveness: true
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
- { id: 2, class: _ }
- { id: 3, class: _ }
body: |
bb.0:
liveins: $r0
; CHECK: [[X:%[0-9]+]]:_(s32) = COPY $r0
; CHECK: [[BITMASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK: [[XAGAIN:%[0-9]+]]:_(s32) = COPY [[X]]
; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[XAGAIN]], [[BITMASK]]
%0(s32) = COPY $r0
%1(s8) = G_TRUNC %0(s32)
; Check that the operation is performed for 32 bits
; CLZ: [[COUNT:%[0-9]+]]:_(s32) = G_CTLZ
; CLZ-NOT: G_CTLZ_ZERO_UNDEF
; LIBCALLS-NOT: G_CTLZ
; LIBCALLS: ADJCALLSTACKDOWN
; LIBCALLS: $r0 = COPY [[X32]]
; LIBCALLS: BL &__clzsi2, {{.*}}, implicit $r0, implicit-def $r0
; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = COPY $r0
; LIBCALLS: ADJCALLSTACKUP
; LIBCALLS-NOT: G_CTLZ
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]]
%2(s8) = G_CTLZ_ZERO_UNDEF %1
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK: [[RAGAIN:%[0-9]+]]:_(s32) = COPY [[R32]]
; CHECK: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[RAGAIN]], [[BITDIFF]]
; CHECK: [[R:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDR]], [[BITDIFF]]
; CHECK: $r0 = COPY [[R]]
%3(s32) = G_SEXT %2(s8)
$r0 = COPY %3(s32)
BX_RET 14, $noreg, implicit $r0
...

View File

@ -27,6 +27,8 @@
define void @test_inttoptr_s32() { ret void } define void @test_inttoptr_s32() { ret void }
define void @test_ptrtoint_s32() { ret void } define void @test_ptrtoint_s32() { ret void }
define void @test_ctlz_s32() #3 { ret void }
@a_global = global float 1.0 @a_global = global float 1.0
define void @test_globals() { ret void } define void @test_globals() { ret void }
@ -83,6 +85,7 @@
attributes #0 = { "target-features"="+vfp2"} attributes #0 = { "target-features"="+vfp2"}
attributes #1 = { "target-features"="+hwdiv-arm" } attributes #1 = { "target-features"="+hwdiv-arm" }
attributes #2 = { "target-features"="+vfp4"} attributes #2 = { "target-features"="+vfp4"}
attributes #3 = { "target-features"="+v5t"}
... ...
--- ---
name: test_add_s32 name: test_add_s32
@ -561,6 +564,25 @@ body: |
BX_RET 14, $noreg, implicit $r0 BX_RET 14, $noreg, implicit $r0
... ...
--- ---
name: test_ctlz_s32
# CHECK-LABEL: name: test_ctlz_s32
legalized: true
regBankSelected: false
selected: false
# CHECK: registers:
# CHECK: - { id: 0, class: gprb, preferred-register: '' }
# CHECK: - { id: 1, class: gprb, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
body: |
bb.0:
%0(s32) = COPY $r0
%1(s32) = G_CTLZ %0(s32)
$r0 = COPY %1(s32)
BX_RET 14, $noreg, implicit $r0
...
---
name: test_globals name: test_globals
# CHECK-LABEL: name: test_globals # CHECK-LABEL: name: test_globals
legalized: true legalized: true

View File

@ -148,6 +148,33 @@ TEST_F(LegalizerHelperTest, LowerBitCountingCTLZ0) {
ASSERT_TRUE(CheckMachineFunction(*MF, CheckStr)); ASSERT_TRUE(CheckMachineFunction(*MF, CheckStr));
} }
// CTLZ expansion in terms of CTLZ_ZERO_UNDEF if the latter is a libcall
TEST_F(LegalizerHelperTest, LowerBitCountingCTLZLibcall) {
if (!TM)
return;
// Declare your legalization info
DefineLegalizerInfo(
A, { getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).libcallFor({s64}); });
// Build
auto MIBCTLZ = B.buildInstr(TargetOpcode::G_CTLZ, LLT::scalar(64), Copies[0]);
AInfo Info(MF->getSubtarget());
LegalizerHelper Helper(*MF, Info);
ASSERT_TRUE(Helper.lower(*MIBCTLZ, 0, LLT::scalar(64)) ==
LegalizerHelper::LegalizeResult::Legalized);
auto CheckStr = R"(
CHECK: [[CZU:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF %0
CHECK: [[ZERO:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
CHECK: [[THIRTY2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), %0:_(s64), [[ZERO]]
CHECK: [[SEL:%[0-9]+]]:_(s64) = G_SELECT [[CMP]]:_(s1), [[THIRTY2]]:_, [[CZU]]
)";
// Check
ASSERT_TRUE(CheckMachineFunction(*MF, CheckStr));
}
// CTLZ expansion // CTLZ expansion
TEST_F(LegalizerHelperTest, LowerBitCountingCTLZ1) { TEST_F(LegalizerHelperTest, LowerBitCountingCTLZ1) {
if (!TM) if (!TM)