mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy
This follows on from D89558 which added the new intrinsic and D88955 which added similar combines for llvm.amdgcn.fmul.legacy. Differential Revision: https://reviews.llvm.org/D90028
This commit is contained in:
parent
91be48b03e
commit
9321aed101
@ -163,6 +163,27 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
|
||||
return IC.replaceInstUsesWith(II, NewCall);
|
||||
}
|
||||
|
||||
bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
||||
InstCombiner &IC) const {
|
||||
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
|
||||
// infinity, gives +0.0. If we can prove we don't have one of the special
|
||||
// cases then we can use a normal multiply instead.
|
||||
// TODO: Create and use isKnownFiniteNonZero instead of just matching
|
||||
// constants here.
|
||||
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
|
||||
match(Op1, PatternMatch::m_FiniteNonZero())) {
|
||||
// One operand is not zero or infinity or NaN.
|
||||
return true;
|
||||
}
|
||||
auto *TLI = &IC.getTargetLibraryInfo();
|
||||
if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
|
||||
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
|
||||
// Neither operand is infinity or NaN.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
Intrinsic::ID IID = II.getIntrinsicID();
|
||||
@ -836,26 +857,40 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
|
||||
// If we can prove we don't have one of the special cases then we can use a
|
||||
// normal fmul instruction instead.
|
||||
auto *TLI = &IC.getTargetLibraryInfo();
|
||||
bool CanSimplifyToMul = false;
|
||||
// TODO: Create and use isKnownFiniteNonZero instead of just matching
|
||||
// constants here.
|
||||
if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
|
||||
match(Op1, PatternMatch::m_FiniteNonZero())) {
|
||||
// One operand is not zero or infinity or NaN.
|
||||
CanSimplifyToMul = true;
|
||||
} else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
|
||||
isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
|
||||
// Neither operand is infinity or NaN.
|
||||
CanSimplifyToMul = true;
|
||||
}
|
||||
if (CanSimplifyToMul) {
|
||||
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
|
||||
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
|
||||
FMul->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, FMul);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_fma_legacy: {
|
||||
Value *Op0 = II.getArgOperand(0);
|
||||
Value *Op1 = II.getArgOperand(1);
|
||||
Value *Op2 = II.getArgOperand(2);
|
||||
|
||||
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
|
||||
// infinity, gives +0.0.
|
||||
// TODO: Move to InstSimplify?
|
||||
if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
|
||||
match(Op1, PatternMatch::m_AnyZeroFP())) {
|
||||
// It's tempting to just return Op2 here, but that would give the wrong
|
||||
// result if Op2 was -0.0.
|
||||
auto *Zero = ConstantFP::getNullValue(II.getType());
|
||||
auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
|
||||
FAdd->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, FAdd);
|
||||
}
|
||||
|
||||
// If we can prove we don't have one of the special cases then we can use a
|
||||
// normal fma instead.
|
||||
if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
|
||||
II.setCalledOperand(Intrinsic::getDeclaration(
|
||||
II.getModule(), Intrinsic::fma, II.getType()));
|
||||
return &II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
||||
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
|
||||
|
@ -227,6 +227,8 @@ public:
|
||||
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
|
||||
Value *NewV) const;
|
||||
|
||||
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
||||
InstCombiner &IC) const;
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
|
86
test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
Normal file
86
test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
Normal file
@ -0,0 +1,86 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
|
||||
|
||||
; Simplify to +0.0 + z.
|
||||
define float @test_zero(float %x, float %z) {
|
||||
; CHECK-LABEL: @test_zero(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Simplify to +0.0 + z, preserving fmf.
|
||||
define float @test_zero_fmf(float %x, float %z) {
|
||||
; CHECK-LABEL: @test_zero_fmf(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = fadd contract float [[Z:%.*]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Simplify to z.
|
||||
define float @test_zero_nsz(float %x, float %z) {
|
||||
; CHECK-LABEL: @test_zero_nsz(
|
||||
; CHECK-NEXT: ret float [[Z:%.*]]
|
||||
;
|
||||
%call = call nsz float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Simplify to +0.0 + z.
|
||||
define float @test_negzero(float %y, float %z) {
|
||||
; CHECK-LABEL: @test_negzero(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Simplify to z.
|
||||
define float @test_negzero_nsz(float %y, float %z) {
|
||||
; CHECK-LABEL: @test_negzero_nsz(
|
||||
; CHECK-NEXT: ret float [[Z:%.*]]
|
||||
;
|
||||
%call = call nsz float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Combine to fma because the constant is finite and non-zero.
|
||||
define float @test_const(float %x, float %z) {
|
||||
; CHECK-LABEL: @test_const(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Combine to fma because the constant is finite and non-zero, preserving fmf.
|
||||
define float @test_const_fmf(float %x, float %z) {
|
||||
; CHECK-LABEL: @test_const_fmf(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call contract float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Combine to fma because neither argument can be infinity or NaN.
|
||||
define float @test_finite(i32 %x, i32 %y, float %z) {
|
||||
; CHECK-LABEL: @test_finite(
|
||||
; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
|
||||
; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
|
||||
; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[XF]], float [[YF]], float [[Z:%.*]])
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%xf = sitofp i32 %x to float
|
||||
%yf = sitofp i32 %y to float
|
||||
%call = call float @llvm.amdgcn.fma.legacy(float %xf, float %yf, float %z)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.fma.legacy(float, float, float)
|
@ -29,6 +29,16 @@ define float @test_const(float %x) {
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Combine to fmul because the constant is finite and non-zero, preserving fmf.
|
||||
define float @test_const_fmf(float %x) {
|
||||
; CHECK-LABEL: @test_const_fmf(
|
||||
; CHECK-NEXT: [[CALL:%.*]] = fmul contract float [[X:%.*]], 9.950000e+01
|
||||
; CHECK-NEXT: ret float [[CALL]]
|
||||
;
|
||||
%call = call contract float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
|
||||
ret float %call
|
||||
}
|
||||
|
||||
; Combine to fmul because neither argument can be infinity or NaN.
|
||||
define float @test_finite(i32 %x, i32 %y) {
|
||||
; CHECK-LABEL: @test_finite(
|
||||
|
Loading…
Reference in New Issue
Block a user