mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[NVPTX] Generate a more optimal sequence for select of i1
Instead of creating a pattern like "(p && a) || ((!p) && b)", just expand the i8 operands to i32 and perform the selp on them. Fixes PR22246 llvm-svn: 227123
This commit is contained in:
parent
15971afe2e
commit
a0a83cc9da
@ -259,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM)
|
||||
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
|
||||
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
|
||||
|
||||
// PTX does not directly support SELP of i1, so promote to i32 first
|
||||
setOperationAction(ISD::SELECT, MVT::i1, Custom);
|
||||
|
||||
// We have some custom DAG combine patterns for these nodes
|
||||
setTargetDAGCombine(ISD::ADD);
|
||||
setTargetDAGCombine(ISD::AND);
|
||||
@ -1803,11 +1806,29 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SRA_PARTS:
|
||||
case ISD::SRL_PARTS:
|
||||
return LowerShiftRightParts(Op, DAG);
|
||||
case ISD::SELECT:
|
||||
return LowerSelect(Op, DAG);
|
||||
default:
|
||||
llvm_unreachable("Custom lowering not defined for operation");
|
||||
}
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Op0 = Op->getOperand(0);
|
||||
SDValue Op1 = Op->getOperand(1);
|
||||
SDValue Op2 = Op->getOperand(2);
|
||||
SDLoc DL(Op.getNode());
|
||||
|
||||
assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
|
||||
|
||||
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
|
||||
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
|
||||
SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
|
||||
|
||||
return Trunc;
|
||||
}
|
||||
|
||||
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (Op.getValueType() == MVT::i1)
|
||||
return LowerLOADi1(Op, DAG);
|
||||
|
@ -529,6 +529,8 @@ private:
|
||||
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const override;
|
||||
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
||||
|
@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
|
||||
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
|
||||
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
|
||||
|
||||
// Special select for predicate operands
|
||||
def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
|
||||
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
|
||||
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
|
||||
|
||||
//
|
||||
// Funnnel shift in clamp mode
|
||||
//
|
||||
|
14
test/CodeGen/NVPTX/bug22246.ll
Normal file
14
test/CodeGen/NVPTX/bug22246.ll
Normal file
@ -0,0 +1,14 @@
|
||||
; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; CHECK-LABEL: _Z3foobbbPb
|
||||
define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
|
||||
entry:
|
||||
; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
|
||||
%.sink.v = select i1 %p1, i1 %p2, i1 %p3
|
||||
%frombool5 = zext i1 %.sink.v to i8
|
||||
store i8 %frombool5, i8* %output, align 1
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user