1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

AMDGPU/R600: Remove code for handling AMDGPUISD::CLAMP

Summary:
We don't generate AMDGPUISD::CLAMP for R600 now that llvm.AMDGPU.clamp
is gone.

Reviewers: arsenm, nhaehnle

Reviewed By: arsenm

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D47181

llvm-svn: 333153
This commit is contained in:
Tom Stellard 2018-05-24 05:28:34 +00:00
parent a2c6eb8268
commit 4b5731b4b0
8 changed files with 28 additions and 56 deletions

View File

@ -2898,28 +2898,6 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
if (!CSrc)
return SDValue();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
APFloat::cmpResult Cmp0 = F.compare(Zero);
if (Cmp0 == APFloat::cmpLessThan ||
(Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
APFloat One(F.getSemantics(), "1.0");
APFloat::cmpResult Cmp1 = F.compare(One);
if (Cmp1 == APFloat::cmpGreaterThan)
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
return SDValue(CSrc, 0);
}
// FIXME: This should go in generic DAG combiner with an isTruncateFree check,
// but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
// issues.
@ -3960,8 +3938,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performLoadCombine(N, DCI);
case ISD::STORE:
return performStoreCombine(N, DCI);
case AMDGPUISD::CLAMP:
return performClampCombine(N, DCI);
case AMDGPUISD::RCP: {
if (const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
// XXX - Should this flush denormals?

View File

@ -78,7 +78,6 @@ protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,

View File

@ -168,8 +168,6 @@ def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
// out = min(a, b) a and b are floats, where a nan comparison fails.
def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
[]

View File

@ -287,13 +287,6 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
*BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
break;
}
case AMDGPU::FABS_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
@ -2180,20 +2173,6 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
} else if (Opcode == AMDGPU::CLAMP_R600) {
SDValue Src = Node->getOperand(0);
if (!Src.isMachineOpcode() ||
!TII->hasInstrModifiers(Src.getMachineOpcode()))
return Node;
int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
AMDGPU::OpName::clamp);
if (ClampIdx < 0)
return Node;
SDLoc DL(Node);
std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
return DAG.getMachineNode(Src.getMachineOpcode(), DL,
Node->getVTList(), Ops);
} else {
if (!TII->hasInstrModifiers(Opcode))
return Node;

View File

@ -663,13 +663,6 @@ let isCodeGenOnly = 1, isPseudo = 1 in {
let usesCustomInserter = 1 in {
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
[(set f32:$dst, (AMDGPUclamp f32:$src0))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
@ -1194,7 +1187,6 @@ class COS_Common <bits<11> inst> : R600_1OP <
let Itinerary = TransALU;
}
def CLAMP_R600 : CLAMP <R600_Reg32>;
def FABS_R600 : FABS<R600_Reg32>;
def FNEG_R600 : FNEG<R600_Reg32>;

View File

@ -7087,6 +7087,29 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
return SDValue();
}
SDValue SITargetLowering::performClampCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
if (!CSrc)
return SDValue();
const APFloat &F = CSrc->getValueAPF();
APFloat Zero = APFloat::getZero(F.getSemantics());
APFloat::cmpResult Cmp0 = F.compare(Zero);
if (Cmp0 == APFloat::cmpLessThan ||
(Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
}
APFloat One(F.getSemantics(), "1.0");
APFloat::cmpResult Cmp1 = F.compare(One);
if (Cmp1 == APFloat::cmpGreaterThan)
return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
return SDValue(CSrc, 0);
}
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
@ -7179,6 +7202,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFMed3Combine(N, DCI);
case AMDGPUISD::CVT_PKRTZ_F16_F32:
return performCvtPkRTZCombine(N, DCI);
case AMDGPUISD::CLAMP:
return performClampCombine(N, DCI);
case ISD::SCALAR_TO_VECTOR: {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);

View File

@ -131,6 +131,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalGlobalAddressingMode(const AddrMode &AM) const;

View File

@ -33,6 +33,8 @@ def SIEncodingFamily {
// SI DAG Nodes
//===----------------------------------------------------------------------===//
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
[SDNPMayLoad, SDNPMemOperand]