mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
R600: Compute masked bits for min and max
llvm-svn: 205242
This commit is contained in:
parent
e806144cb9
commit
c36c1df67d
@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
}
|
||||
}
|
||||
|
||||
static void computeMaskedBitsForMinMax(const SDValue Op0,
|
||||
const SDValue Op1,
|
||||
APInt &KnownZero,
|
||||
APInt &KnownOne,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) {
|
||||
APInt Op0Zero, Op0One;
|
||||
APInt Op1Zero, Op1One;
|
||||
DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
|
||||
DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
|
||||
|
||||
KnownZero = Op0Zero & Op1Zero;
|
||||
KnownOne = Op0One & Op1One;
|
||||
}
|
||||
|
||||
void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
|
||||
const SDValue Op,
|
||||
APInt &KnownZero,
|
||||
APInt &KnownOne,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const {
|
||||
|
||||
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
|
||||
unsigned Opc = Op.getOpcode();
|
||||
switch (Opc) {
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
// FIXME: The intrinsic should just use the node.
|
||||
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
|
||||
case AMDGPUIntrinsic::AMDGPU_imax:
|
||||
case AMDGPUIntrinsic::AMDGPU_umax:
|
||||
case AMDGPUIntrinsic::AMDGPU_imin:
|
||||
case AMDGPUIntrinsic::AMDGPU_umin:
|
||||
computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
|
||||
KnownZero, KnownOne, DAG, Depth);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case AMDGPUISD::SMAX:
|
||||
case AMDGPUISD::UMAX:
|
||||
case AMDGPUISD::SMIN:
|
||||
case AMDGPUISD::UMIN:
|
||||
computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
|
||||
KnownZero, KnownOne, DAG, Depth);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @trunc_zext_umax
|
||||
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
|
||||
; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
|
||||
; SI-NOT: AND
|
||||
; SI: BUFFER_STORE_SHORT [[RESULT]],
|
||||
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
|
||||
%tmp5 = load i8 addrspace(1)* %src, align 1
|
||||
%tmp2 = zext i8 %tmp5 to i32
|
||||
%tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
|
||||
%tmp4 = trunc i32 %tmp3 to i8
|
||||
%tmp6 = zext i8 %tmp4 to i16
|
||||
store i16 %tmp6, i16 addrspace(1)* %out, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare i32 @llvm.AMDGPU.umax(i32, i32) #1
|
||||
|
||||
|
@ -21,6 +21,21 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @trunc_zext_umin
|
||||
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
|
||||
; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
|
||||
; SI-NOT: AND
|
||||
; SI: BUFFER_STORE_SHORT [[RESULT]],
|
||||
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
|
||||
%tmp5 = load i8 addrspace(1)* %src, align 1
|
||||
%tmp2 = zext i8 %tmp5 to i32
|
||||
%tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
|
||||
%tmp4 = trunc i32 %tmp3 to i8
|
||||
%tmp6 = zext i8 %tmp4 to i16
|
||||
store i16 %tmp6, i16 addrspace(1)* %out, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare i32 @llvm.AMDGPU.umin(i32, i32) #1
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user