1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

SelectionDAG: Lower some range metadata to AssertZext

If a range has a lower bound of 0, add an AssertZext from the
nearest floor power of two.

This allows operations with some workitem intrinsics with known
maximum ranges to use fast 24-bit multiplies.

llvm-svn: 260109
This commit is contained in:
Matt Arsenault 2016-02-08 16:28:19 +00:00
parent c705af63bb
commit 34d57039a9
4 changed files with 135 additions and 3 deletions

View File

@ -3721,7 +3721,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
}
} else
Result = lowerRangeToAssertZExt(DAG, I, Result);
setValue(&I, Result);
}
@ -5419,8 +5420,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
.setTailCall(isTailCall);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
if (Result.first.getNode()) {
const Instruction *Inst = CS.getInstruction();
Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first);
setValue(Inst, Result.first);
}
}
/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@ -6716,6 +6720,39 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.getSrcValue(I.getArgOperand(1))));
}
SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
const Instruction &I,
SDValue Op) {
const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
if (!Range)
return Op;
Constant *Lo = cast<ConstantAsMetadata>(Range->getOperand(0))->getValue();
if (!Lo->isNullValue())
return Op;
Constant *Hi = cast<ConstantAsMetadata>(Range->getOperand(1))->getValue();
unsigned Bits = cast<ConstantInt>(Hi)->getValue().logBase2();
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
SDLoc SL = getCurSDLoc();
SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(),
Op, DAG.getValueType(SmallVT));
unsigned NumVals = Op.getNode()->getNumValues();
if (NumVals == 1)
return ZExt;
SmallVector<SDValue, 4> Ops;
Ops.push_back(ZExt);
for (unsigned I = 1; I != NumVals; ++I)
Ops.push_back(Op.getValue(I));
return DAG.getMergeValues(Ops, SL);
}
/// \brief Lower an argument list according to the target calling convention.
///
/// \return A tuple of <return-value, token-chain>

View File

@ -708,6 +708,11 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
const BasicBlock *EHPadBB = nullptr);
// Lower range metadata from 0 to N to assert zext to an integer of nearest
// floor power of two.
SDValue lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I,
SDValue Op);
std::pair<SDValue, SDValue> lowerCallOperands(
ImmutableCallSite CS,
unsigned ArgIdx,

View File

@ -0,0 +1,44 @@
; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
; and can be eliminated
; CHECK-LABEL: {{^}}test_call_known_max_range:
; CHECK: bl foo
; CHECK-NOT: and
; CHECK: ret
define i32 @test_call_known_max_range() #0 {
entry:
%id = tail call i32 @foo(), !range !0
%and = and i32 %id, 1023
ret i32 %and
}
; CHECK-LABEL: {{^}}test_call_known_trunc_1_bit_range:
; CHECK: bl foo
; CHECK: and w{{[0-9]+}}, w0, #0x1ff
; CHECK: ret
define i32 @test_call_known_trunc_1_bit_range() #0 {
entry:
%id = tail call i32 @foo(), !range !0
%and = and i32 %id, 511
ret i32 %and
}
; CHECK-LABEL: {{^}}test_call_known_max_range_m1:
; CHECK: bl foo
; CHECK: and w{{[0-9]+}}, w0, #0xff
; CHECK: ret
define i32 @test_call_known_max_range_m1() #0 {
entry:
%id = tail call i32 @foo(), !range !1
%and = and i32 %id, 255
ret i32 %and
}
declare i32 @foo()
attributes #0 = { norecurse nounwind }
attributes #1 = { nounwind readnone }
!0 = !{i32 0, i32 1024}
!1 = !{i32 0, i32 1023}

View File

@ -0,0 +1,46 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-unknown < %s | FileCheck %s
; and can be eliminated
; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range:
; CHECK-NOT: v0
; CHECK: {{flat|buffer}}_store_dword v0
define void @test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
%and = and i32 %id, 1023
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: {{^}}test_workitem_id_x_known_trunc_1_bit_range:
; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1ff, v0
; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
define void @test_workitem_id_x_known_trunc_1_bit_range(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0
%and = and i32 %id, 511
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
; CHECK-LABEL: {{^}}test_workitem_id_x_known_max_range_m1:
; CHECK-NOT: v0
; CHECK: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xff, v0
; CHECK: {{flat|buffer}}_store_dword [[MASKED]]
define void @test_workitem_id_x_known_max_range_m1(i32 addrspace(1)* nocapture %out) #0 {
entry:
%id = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !1
%and = and i32 %id, 255
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { norecurse nounwind }
attributes #1 = { nounwind readnone }
!0 = !{i32 0, i32 1024}
!1 = !{i32 0, i32 1023}