mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU/R600: Remove code for handling AMDGPUISD::CLAMP
Summary: We don't generate AMDGPUISD::CLAMP for R600 now that llvm.AMDGPU.clamp is gone. Reviewers: arsenm, nhaehnle Reviewed By: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D47181 llvm-svn: 333153
This commit is contained in:
parent
a2c6eb8268
commit
4b5731b4b0
@ -2898,28 +2898,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
||||
SN->getBasePtr(), SN->getMemOperand());
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
|
||||
if (!CSrc)
|
||||
return SDValue();
|
||||
|
||||
const APFloat &F = CSrc->getValueAPF();
|
||||
APFloat Zero = APFloat::getZero(F.getSemantics());
|
||||
APFloat::cmpResult Cmp0 = F.compare(Zero);
|
||||
if (Cmp0 == APFloat::cmpLessThan ||
|
||||
(Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
|
||||
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
|
||||
}
|
||||
|
||||
APFloat One(F.getSemantics(), "1.0");
|
||||
APFloat::cmpResult Cmp1 = F.compare(One);
|
||||
if (Cmp1 == APFloat::cmpGreaterThan)
|
||||
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
|
||||
|
||||
return SDValue(CSrc, 0);
|
||||
}
|
||||
|
||||
// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
|
||||
// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
|
||||
// issues.
|
||||
@ -3960,8 +3938,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return performLoadCombine(N, DCI);
|
||||
case ISD::STORE:
|
||||
return performStoreCombine(N, DCI);
|
||||
case AMDGPUISD::CLAMP:
|
||||
return performClampCombine(N, DCI);
|
||||
case AMDGPUISD::RCP: {
|
||||
if (const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
|
||||
// XXX - Should this flush denormals?
|
||||
|
@ -78,7 +78,6 @@ protected:
|
||||
bool shouldCombineMemoryType(EVT VT) const;
|
||||
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
|
||||
|
@ -168,8 +168,6 @@ def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
|
||||
|
||||
// out = min(a, b) a and b are floats, where a nan comparison fails.
|
||||
def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
|
||||
[]
|
||||
|
@ -287,13 +287,6 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::CLAMP_R600: {
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
||||
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
|
||||
MI.getOperand(1).getReg());
|
||||
TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::FABS_R600: {
|
||||
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
||||
@ -2180,20 +2173,6 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
||||
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
} else if (Opcode == AMDGPU::CLAMP_R600) {
|
||||
SDValue Src = Node->getOperand(0);
|
||||
if (!Src.isMachineOpcode() ||
|
||||
!TII->hasInstrModifiers(Src.getMachineOpcode()))
|
||||
return Node;
|
||||
int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
|
||||
AMDGPU::OpName::clamp);
|
||||
if (ClampIdx < 0)
|
||||
return Node;
|
||||
SDLoc DL(Node);
|
||||
std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
|
||||
Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
|
||||
return DAG.getMachineNode(Src.getMachineOpcode(), DL,
|
||||
Node->getVTList(), Ops);
|
||||
} else {
|
||||
if (!TII->hasInstrModifiers(Opcode))
|
||||
return Node;
|
||||
|
@ -663,13 +663,6 @@ let isCodeGenOnly = 1, isPseudo = 1 in {
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
|
||||
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"CLAMP $dst, $src0",
|
||||
[(set f32:$dst, (AMDGPUclamp f32:$src0))]
|
||||
>;
|
||||
|
||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
@ -1194,7 +1187,6 @@ class COS_Common <bits<11> inst> : R600_1OP <
|
||||
let Itinerary = TransALU;
|
||||
}
|
||||
|
||||
def CLAMP_R600 : CLAMP <R600_Reg32>;
|
||||
def FABS_R600 : FABS<R600_Reg32>;
|
||||
def FNEG_R600 : FNEG<R600_Reg32>;
|
||||
|
||||
|
@ -7087,6 +7087,29 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::performClampCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
|
||||
if (!CSrc)
|
||||
return SDValue();
|
||||
|
||||
const APFloat &F = CSrc->getValueAPF();
|
||||
APFloat Zero = APFloat::getZero(F.getSemantics());
|
||||
APFloat::cmpResult Cmp0 = F.compare(Zero);
|
||||
if (Cmp0 == APFloat::cmpLessThan ||
|
||||
(Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
|
||||
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
|
||||
}
|
||||
|
||||
APFloat One(F.getSemantics(), "1.0");
|
||||
APFloat::cmpResult Cmp1 = F.compare(One);
|
||||
if (Cmp1 == APFloat::cmpGreaterThan)
|
||||
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
|
||||
|
||||
return SDValue(CSrc, 0);
|
||||
}
|
||||
|
||||
|
||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
switch (N->getOpcode()) {
|
||||
@ -7179,6 +7202,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return performFMed3Combine(N, DCI);
|
||||
case AMDGPUISD::CVT_PKRTZ_F16_F32:
|
||||
return performCvtPkRTZCombine(N, DCI);
|
||||
case AMDGPUISD::CLAMP:
|
||||
return performClampCombine(N, DCI);
|
||||
case ISD::SCALAR_TO_VECTOR: {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -131,6 +131,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
|
||||
bool isLegalGlobalAddressingMode(const AddrMode &AM) const;
|
||||
|
@ -33,6 +33,8 @@ def SIEncodingFamily {
|
||||
// SI DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
|
||||
|
||||
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
|
||||
[SDNPMayLoad, SDNPMemOperand]
|
||||
|
Loading…
Reference in New Issue
Block a user