mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Pattern match ffbh pattern to instruction.
The hardware instruction's output on 0 is -1 rather than 32. Eliminate a test and select to -1. This removes an extra instruction from the compatability function with HSAIL's firstbit instruction. llvm-svn: 257352
This commit is contained in:
parent
6195badb41
commit
b88ff2e112
@ -282,7 +282,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::SMAX, MVT::i32, Legal);
|
||||
setOperationAction(ISD::UMAX, MVT::i32, Legal);
|
||||
|
||||
if (!Subtarget->hasFFBH())
|
||||
if (Subtarget->hasFFBH())
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
|
||||
else
|
||||
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
|
||||
|
||||
if (!Subtarget->hasFFBL())
|
||||
@ -2170,9 +2172,11 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc SL(Op);
|
||||
SDValue Src = Op.getOperand(0);
|
||||
assert(Src.getValueType() == MVT::i64);
|
||||
|
||||
bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
|
||||
|
||||
if (ZeroUndef && Src.getValueType() == MVT::i32)
|
||||
return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
|
||||
|
||||
SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
|
||||
|
||||
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
|
||||
@ -2507,6 +2511,79 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
|
||||
return DAG.getSExtOrTrunc(Mul, DL, VT);
|
||||
}
|
||||
|
||||
static bool isNegativeOne(SDValue Val) {
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
|
||||
return C->isAllOnesValue();
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool isCtlzOpc(unsigned Opc) {
|
||||
return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
|
||||
}
|
||||
|
||||
// The native instructions return -1 on 0 input. Optimize out a select that
|
||||
// produces -1 on 0.
|
||||
//
|
||||
// TODO: If zero is not undef, we could also do this if the output is compared
|
||||
// against the bitwidth.
|
||||
//
|
||||
// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
|
||||
SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
|
||||
SDValue Cond,
|
||||
SDValue LHS,
|
||||
SDValue RHS,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
|
||||
if (!CmpRhs || !CmpRhs->isNullValue())
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
||||
SDValue CmpLHS = Cond.getOperand(0);
|
||||
|
||||
// select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
|
||||
if (CCOpcode == ISD::SETEQ &&
|
||||
isCtlzOpc(RHS.getOpcode()) &&
|
||||
RHS.getOperand(0) == CmpLHS &&
|
||||
isNegativeOne(LHS)) {
|
||||
return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
|
||||
}
|
||||
|
||||
// select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
|
||||
if (CCOpcode == ISD::SETNE &&
|
||||
isCtlzOpc(LHS.getOpcode()) &&
|
||||
LHS.getOperand(0) == CmpLHS &&
|
||||
isNegativeOne(RHS)) {
|
||||
return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, CmpLHS);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
if (Cond.getOpcode() != ISD::SETCC)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = Cond.getOperand(0);
|
||||
SDValue RHS = Cond.getOperand(1);
|
||||
SDValue CC = Cond.getOperand(2);
|
||||
|
||||
SDValue True = N->getOperand(1);
|
||||
SDValue False = N->getOperand(2);
|
||||
|
||||
if (VT == MVT::f32 && Cond.hasOneUse())
|
||||
return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
|
||||
|
||||
// There's no reason to not do this if the condition has other uses.
|
||||
if (VT == MVT::i32)
|
||||
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
@ -2531,23 +2608,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
simplifyI24(N1, DCI);
|
||||
return SDValue();
|
||||
}
|
||||
case ISD::SELECT: {
|
||||
SDValue Cond = N->getOperand(0);
|
||||
if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = Cond.getOperand(0);
|
||||
SDValue RHS = Cond.getOperand(1);
|
||||
SDValue CC = Cond.getOperand(2);
|
||||
|
||||
SDValue True = N->getOperand(1);
|
||||
SDValue False = N->getOperand(2);
|
||||
|
||||
if (VT == MVT::f32)
|
||||
return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT:
|
||||
return performSelectCombine(N, DCI);
|
||||
case AMDGPUISD::BFE_I32:
|
||||
case AMDGPUISD::BFE_U32: {
|
||||
assert(!N->getValueType(0).isVector() &&
|
||||
@ -2759,6 +2821,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(BFE_I32)
|
||||
NODE_NAME_CASE(BFI)
|
||||
NODE_NAME_CASE(BFM)
|
||||
NODE_NAME_CASE(FFBH_U32)
|
||||
NODE_NAME_CASE(MUL_U24)
|
||||
NODE_NAME_CASE(MUL_I24)
|
||||
NODE_NAME_CASE(MAD_U24)
|
||||
|
@ -69,6 +69,9 @@ private:
|
||||
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
|
||||
DAGCombinerInfo &DCI) const;
|
||||
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
protected:
|
||||
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
|
||||
@ -265,6 +268,7 @@ enum NodeType : unsigned {
|
||||
BFE_I32, // Extract range of bits with sign extension to 32-bits.
|
||||
BFI, // (src0 & src1) | (~src0 & src2)
|
||||
BFM, // Insert a range of bits into a 32-bit word.
|
||||
FFBH_U32, // ctlz with -1 if input is zero.
|
||||
MUL_U24,
|
||||
MUL_I24,
|
||||
MAD_U24,
|
||||
|
@ -191,6 +191,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
|
||||
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
|
||||
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
|
||||
|
||||
def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
|
||||
|
||||
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
|
||||
// performing the mulitply. The result is a 32-bit value.
|
||||
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
|
||||
|
@ -349,7 +349,7 @@ def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
|
||||
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
|
||||
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
|
||||
|
||||
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
|
||||
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
|
||||
def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
|
||||
|
||||
let hasSideEffects = 1 in {
|
||||
|
@ -2026,7 +2026,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
case ISD::UINT_TO_FP: {
|
||||
return performUCharToFloatCombine(N, DCI);
|
||||
|
||||
}
|
||||
case ISD::FADD: {
|
||||
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
|
||||
break;
|
||||
@ -2108,7 +2108,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
case ISD::LOAD:
|
||||
case ISD::STORE:
|
||||
case ISD::ATOMIC_LOAD:
|
||||
|
@ -144,7 +144,7 @@ defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
|
||||
defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
|
||||
|
||||
defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
|
||||
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
|
||||
[(set i32:$dst, (AMDGPUffbh_u32 i32:$src0))]
|
||||
>;
|
||||
|
||||
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
|
||||
|
@ -150,3 +150,62 @@ define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)*
|
||||
store i32 %trunc, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 -1, i32 %ctlz
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
; SI: s_endpgm
|
||||
define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
%cmp = icmp ne i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 %ctlz, i32 -1
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; TODO: Should be able to eliminate select here as well.
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: s_endpgm
|
||||
define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
%cmp = icmp eq i32 %ctlz, 32
|
||||
%sel = select i1 %cmp, i32 -1, i32 %ctlz
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: s_endpgm
|
||||
define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
|
||||
%cmp = icmp ne i32 %ctlz, 32
|
||||
%sel = select i1 %cmp, i32 %ctlz, i32 -1
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
|
||||
|
||||
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
|
||||
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
|
||||
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
|
||||
@ -131,3 +133,123 @@ define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 add
|
||||
store i32 %trunc, i32 addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
define void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 -1, i32 %ctlz
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_neg1:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
|
||||
; SI-NEXT: buffer_store_dword [[RESULT]],
|
||||
define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp ne i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 %ctlz, i32 -1
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
|
||||
; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[VAL]]
|
||||
; SI-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc
|
||||
; SI-DAG: buffer_store_dword [[RESULT0]]
|
||||
; SI-DAG: buffer_store_byte [[RESULT1]]
|
||||
; SI: s_endpgm
|
||||
define void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 -1, i32 %ctlz
|
||||
store volatile i32 %sel, i32 addrspace(1)* %out
|
||||
store volatile i1 %cmp, i1 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; Selected on wrong constant
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_0:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: buffer_store_dword
|
||||
define void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp eq i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 0, i32 %ctlz
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Selected on wrong constant
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_0:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: buffer_store_dword
|
||||
define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp ne i32 %val, 0
|
||||
%sel = select i1 %cmp, i32 %ctlz, i32 0
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Compare on wrong constant
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: buffer_store_dword
|
||||
define void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp eq i32 %val, 1
|
||||
%sel = select i1 %cmp, i32 0, i32 %ctlz
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; Selected on wrong constant
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
|
||||
; SI: buffer_load_dword
|
||||
; SI: v_ffbh_u32_e32
|
||||
; SI: v_cmp
|
||||
; SI: v_cndmask
|
||||
; SI: buffer_store_dword
|
||||
define void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i32, i32 addrspace(1)* %valptr
|
||||
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
|
||||
%cmp = icmp ne i32 %val, 1
|
||||
%sel = select i1 %cmp, i32 %ctlz, i32 0
|
||||
store i32 %sel, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
|
||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
|
||||
; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
|
||||
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[FFBH]]
|
||||
; SI: buffer_store_dword [[RESULT]],
|
||||
define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
|
||||
%val = load i8, i8 addrspace(1)* %valptr
|
||||
%ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
|
||||
store i8 %ctlz, i8 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user