mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Address todo for handling 1/(2 pi)
llvm-svn: 339814
This commit is contained in:
parent
cd6d7bb841
commit
d8fe316d41
@ -3449,9 +3449,27 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
|
||||
return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
|
||||
}
|
||||
|
||||
static bool isConstantFPZero(SDValue N) {
|
||||
if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
|
||||
return C->isZero() && !C->isNegative();
|
||||
static bool isInv2Pi(const APFloat &APF) {
|
||||
static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
|
||||
static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
|
||||
static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
|
||||
|
||||
return APF.bitwiseIsEqual(KF16) ||
|
||||
APF.bitwiseIsEqual(KF32) ||
|
||||
APF.bitwiseIsEqual(KF64);
|
||||
}
|
||||
|
||||
// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
|
||||
// additional cost to negate them.
|
||||
bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
|
||||
if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
|
||||
if (C->isZero() && !C->isNegative())
|
||||
return true;
|
||||
|
||||
if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3577,9 +3595,8 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
|
||||
SDValue RHS = N0.getOperand(1);
|
||||
|
||||
// 0 doesn't have a negated inline immediate.
|
||||
// TODO: Shouldn't fold 1/2pi either, and should be generalized to other
|
||||
// operations.
|
||||
if (isConstantFPZero(RHS))
|
||||
// TODO: This constant check should be generalized to other operations.
|
||||
if (isConstantCostlierToNegate(RHS))
|
||||
return SDValue();
|
||||
|
||||
SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
|
||||
|
@ -95,6 +95,8 @@ protected:
|
||||
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
|
||||
SDValue RHS, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
bool isConstantCostlierToNegate(SDValue N) const;
|
||||
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
@ -136,6 +136,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
|
||||
HasVOP3PInsts(false),
|
||||
HasMulI24(true),
|
||||
HasMulU24(true),
|
||||
HasInv2PiInlineImm(false),
|
||||
HasFminFmaxLegacy(true),
|
||||
EnablePromoteAlloca(false),
|
||||
LocalMemorySize(0),
|
||||
@ -190,7 +191,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasVGPRIndexMode(false),
|
||||
HasScalarStores(false),
|
||||
HasScalarAtomics(false),
|
||||
HasInv2PiInlineImm(false),
|
||||
HasSDWAOmod(false),
|
||||
HasSDWAScalar(false),
|
||||
HasSDWASdst(false),
|
||||
|
@ -72,6 +72,7 @@ protected:
|
||||
bool HasVOP3PInsts;
|
||||
bool HasMulI24;
|
||||
bool HasMulU24;
|
||||
bool HasInv2PiInlineImm;
|
||||
bool HasFminFmaxLegacy;
|
||||
bool EnablePromoteAlloca;
|
||||
int LocalMemorySize;
|
||||
@ -170,6 +171,10 @@ public:
|
||||
return HasMulU24;
|
||||
}
|
||||
|
||||
bool hasInv2PiInlineImm() const {
|
||||
return HasInv2PiInlineImm;
|
||||
}
|
||||
|
||||
bool hasFminFmaxLegacy() const {
|
||||
return HasFminFmaxLegacy;
|
||||
}
|
||||
@ -347,7 +352,6 @@ protected:
|
||||
bool HasVGPRIndexMode;
|
||||
bool HasScalarStores;
|
||||
bool HasScalarAtomics;
|
||||
bool HasInv2PiInlineImm;
|
||||
bool HasSDWAOmod;
|
||||
bool HasSDWAScalar;
|
||||
bool HasSDWASdst;
|
||||
@ -782,9 +786,6 @@ public:
|
||||
return HasScalarAtomics;
|
||||
}
|
||||
|
||||
bool hasInv2PiInlineImm() const {
|
||||
return HasInv2PiInlineImm;
|
||||
}
|
||||
|
||||
bool hasDPP() const {
|
||||
return HasDPP;
|
||||
|
File diff suppressed because it is too large
Load Diff
28
test/CodeGen/AMDGPU/fneg-combines.si.ll
Normal file
28
test/CodeGen/AMDGPU/fneg-combines.si.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
; --------------------------------------------------------------------------------
|
||||
; rcp_legacy tests
|
||||
; --------------------------------------------------------------------------------
|
||||
|
||||
; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
|
||||
; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]]
|
||||
; GCN: {{buffer|flat}}_store_dword [[RESULT]]
|
||||
define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
|
||||
%out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%a = load volatile float, float addrspace(1)* %a.gep
|
||||
%rcp = call float @llvm.amdgcn.rcp.legacy(float %a)
|
||||
%fneg = fsub float -0.000000e+00, %rcp
|
||||
store float %fneg, float addrspace(1)* %out.gep
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
declare float @llvm.amdgcn.rcp.legacy(float) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
x
Reference in New Issue
Block a user