1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Remove more unused intrinsics

Replace tests with lrp with basic IR expansion

llvm-svn: 258612
This commit is contained in:
Matt Arsenault 2016-01-23 05:42:38 +00:00
parent 7a3addc91c
commit fe8ee22547
12 changed files with 1950 additions and 1907 deletions

View File

@ -917,17 +917,11 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
default: return Op;
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
case Intrinsic::AMDGPU_rsq_clamped:
assert(Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS);
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
@ -983,23 +977,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
/// Linear Interpolation
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
// TODO: Should this propagate fast-math-flags?
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
Op.getOperand(3));
return DAG.getNode(ISD::FADD, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
OneSubAC);
}
/// \brief Generate Min/Max node
SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
EVT VT,

View File

@ -176,7 +176,6 @@ public:
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
SDValue CombineFMinMaxLegacy(SDLoc DL,
EVT VT,
SDValue LHS,

View File

@ -12,34 +12,11 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
// This is named backwards (instead of rsq_legacy) so we don't have
// to define it with the public builtins intrinsics. This is a
// workaround for how intrinsic names are parsed. If the name is
// llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
// llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

View File

@ -615,12 +615,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::AMDGPU_store_output: {
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
MFI->LiveOuts.push_back(Reg);
return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
}
case AMDGPUIntrinsic::R600_store_swizzle: {
SDLoc DL(Op);
const SDValue Args[8] = {

View File

@ -160,7 +160,8 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
let Inst{63-32} = Word1;
}
class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
class R600_2OP_Helper <bits<11> inst, string opName,
SDPatternOperator node = null_frag,
InstrItinClass itin = AnyALU> :
R600_2OP <inst, opName,
[(set R600_Reg32:$dst, (node R600_Reg32:$src0,
@ -678,7 +679,7 @@ let Predicates = [isR600toCayman] in {
def ADD : R600_2OP_Helper <0x0, "ADD", fadd>;
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE">;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
// TODO: Do these actually match the regular fmin/fmax behavior?
def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
@ -1135,11 +1136,6 @@ def FNEG_R600 : FNEG<R600_Reg32>;
// FIXME: Should be predicated on unsafe fp math.
multiclass DIV_Common <InstR600 recip_ieee> {
def : Pat<
(int_AMDGPU_div f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))
>;
def : Pat<
(fdiv f32:$src0, f32:$src1),
(MUL_IEEE $src0, (recip_ieee $src1))

View File

@ -1450,7 +1450,7 @@ defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
let isCommutable = 1 in {
defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
VOP_F32_F32_F32, int_AMDGPU_mul
VOP_F32_F32_F32
>;
defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
@ -2049,13 +2049,6 @@ def SI_CONSTDATA_PTR : InstSI <
let Predicates = [isGCN] in {
def : Pat<
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
(V_CNDMASK_B32_e64 $src2, $src1,
(V_CMP_GT_F32_e64 SRCMODS.NONE, 0, SRCMODS.NONE, $src0,
DSTCLAMP.NONE, DSTOMOD.NONE))
>;
def : Pat <
(int_AMDGPU_kilp),
(SI_KILL 0xbf800000)
@ -2712,11 +2705,6 @@ def : Pat <
/* llvm.AMDGPU.pow */
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
(int_AMDGPU_div f32:$src0, f32:$src1),
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(REG_SEQUENCE VReg_128,

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +0,0 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_legacy_f32:
; SI: v_rsq_legacy_f32_e32
; EG: RECIPSQRT_IEEE
define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
store float %rsq, float addrspace(1)* %out, align 4
ret void
}

View File

@ -1,17 +0,0 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> inreg %reg0) #0 {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
%vec = insertelement <4 x float> undef, float %r2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @llvm.AMDGPU.mul(float ,float ) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { "ShaderType"="0" }

View File

@ -1,13 +0,0 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
; FUNC-LABEL: {{^}}test_lrp:
; SI: v_mad_f32
; SI: v_mac_f32_e32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
store float %mad, float addrspace(1)* %out, align 4
ret void
}

File diff suppressed because it is too large Load Diff

View File

@ -3,10 +3,10 @@
; If this occurs it is likely due to reordering and the restore was
; originally supposed to happen before SI_END_CF.
; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
; SI-NOT: v_readlane_b32 [[SAVED]]
define void @main() #0 {
define void @main() #1 {
main_body:
%0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
%1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
@ -84,180 +84,182 @@ LOOP: ; preds = %ENDIF2795, %main_bo
br i1 %67, label %ENDLOOP, label %ENDIF
ENDLOOP: ; preds = %ELSE2566, %LOOP
%68 = call float @llvm.AMDGPU.lrp(float %0, float undef, float undef)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %68, float undef, float 1.000000e+00)
%one.sub.a.i = fsub float 1.000000e+00, %0
%one.sub.ac.i = fmul float %one.sub.a.i, undef
%result.i = fadd float fmul (float undef, float undef), %one.sub.ac.i
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %result.i, float undef, float 1.000000e+00)
ret void
ENDIF: ; preds = %LOOP
%69 = fsub float %2, undef
%70 = fsub float %3, undef
%71 = fsub float %4, undef
%72 = fmul float %69, 0.000000e+00
%68 = fsub float %2, undef
%69 = fsub float %3, undef
%70 = fsub float %4, undef
%71 = fmul float %68, 0.000000e+00
%72 = fmul float %69, undef
%73 = fmul float %70, undef
%74 = fmul float %71, undef
%75 = fsub float %6, undef
%76 = fsub float %7, undef
%77 = fmul float %75, undef
%78 = fmul float %76, 0.000000e+00
%79 = call float @llvm.minnum.f32(float %74, float %78)
%80 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
%81 = call float @llvm.maxnum.f32(float %73, float %77)
%82 = call float @llvm.maxnum.f32(float undef, float %79)
%83 = call float @llvm.minnum.f32(float %80, float %81)
%84 = call float @llvm.minnum.f32(float %83, float undef)
%85 = fsub float %14, undef
%86 = fsub float %15, undef
%87 = fsub float %16, undef
%74 = fsub float %6, undef
%75 = fsub float %7, undef
%76 = fmul float %74, undef
%77 = fmul float %75, 0.000000e+00
%78 = call float @llvm.minnum.f32(float %73, float %77)
%79 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
%80 = call float @llvm.maxnum.f32(float %72, float %76)
%81 = call float @llvm.maxnum.f32(float undef, float %78)
%82 = call float @llvm.minnum.f32(float %79, float %80)
%83 = call float @llvm.minnum.f32(float %82, float undef)
%84 = fsub float %14, undef
%85 = fsub float %15, undef
%86 = fsub float %16, undef
%87 = fmul float %84, undef
%88 = fmul float %85, undef
%89 = fmul float %86, undef
%90 = fmul float %87, undef
%91 = fsub float %17, undef
%92 = fsub float %18, undef
%93 = fsub float %19, undef
%94 = fmul float %91, 0.000000e+00
%90 = fsub float %17, undef
%91 = fsub float %18, undef
%92 = fsub float %19, undef
%93 = fmul float %90, 0.000000e+00
%94 = fmul float %91, undef
%95 = fmul float %92, undef
%96 = fmul float %93, undef
%97 = call float @llvm.minnum.f32(float %89, float %95)
%98 = call float @llvm.maxnum.f32(float %88, float %94)
%99 = call float @llvm.maxnum.f32(float %90, float %96)
%100 = call float @llvm.maxnum.f32(float undef, float %97)
%101 = call float @llvm.maxnum.f32(float %100, float undef)
%102 = call float @llvm.minnum.f32(float %98, float undef)
%103 = call float @llvm.minnum.f32(float %102, float %99)
%104 = fsub float %30, undef
%105 = fsub float %31, undef
%96 = call float @llvm.minnum.f32(float %88, float %94)
%97 = call float @llvm.maxnum.f32(float %87, float %93)
%98 = call float @llvm.maxnum.f32(float %89, float %95)
%99 = call float @llvm.maxnum.f32(float undef, float %96)
%100 = call float @llvm.maxnum.f32(float %99, float undef)
%101 = call float @llvm.minnum.f32(float %97, float undef)
%102 = call float @llvm.minnum.f32(float %101, float %98)
%103 = fsub float %30, undef
%104 = fsub float %31, undef
%105 = fmul float %103, 0.000000e+00
%106 = fmul float %104, 0.000000e+00
%107 = fmul float %105, 0.000000e+00
%108 = call float @llvm.minnum.f32(float undef, float %106)
%107 = call float @llvm.minnum.f32(float undef, float %105)
%108 = call float @llvm.maxnum.f32(float undef, float %106)
%109 = call float @llvm.maxnum.f32(float undef, float %107)
%110 = call float @llvm.maxnum.f32(float undef, float %108)
%111 = call float @llvm.maxnum.f32(float %110, float undef)
%112 = call float @llvm.minnum.f32(float undef, float %109)
%113 = fsub float %32, undef
%114 = fsub float %33, undef
%115 = fsub float %34, undef
%116 = fmul float %113, 0.000000e+00
%110 = call float @llvm.maxnum.f32(float %109, float undef)
%111 = call float @llvm.minnum.f32(float undef, float %108)
%112 = fsub float %32, undef
%113 = fsub float %33, undef
%114 = fsub float %34, undef
%115 = fmul float %112, 0.000000e+00
%116 = fmul float %113, undef
%117 = fmul float %114, undef
%118 = fmul float %115, undef
%119 = fsub float %35, undef
%120 = fsub float %36, undef
%121 = fsub float %37, undef
%118 = fsub float %35, undef
%119 = fsub float %36, undef
%120 = fsub float %37, undef
%121 = fmul float %118, undef
%122 = fmul float %119, undef
%123 = fmul float %120, undef
%124 = fmul float %121, undef
%124 = call float @llvm.minnum.f32(float %115, float %121)
%125 = call float @llvm.minnum.f32(float %116, float %122)
%126 = call float @llvm.minnum.f32(float %117, float %123)
%127 = call float @llvm.minnum.f32(float %118, float %124)
%128 = call float @llvm.maxnum.f32(float %125, float %126)
%129 = call float @llvm.maxnum.f32(float %128, float %127)
%130 = fsub float %38, undef
%131 = fsub float %39, undef
%132 = fsub float %40, undef
%133 = fmul float %130, 0.000000e+00
%127 = call float @llvm.maxnum.f32(float %124, float %125)
%128 = call float @llvm.maxnum.f32(float %127, float %126)
%129 = fsub float %38, undef
%130 = fsub float %39, undef
%131 = fsub float %40, undef
%132 = fmul float %129, 0.000000e+00
%133 = fmul float %130, undef
%134 = fmul float %131, undef
%135 = fmul float %132, undef
%136 = fsub float %41, undef
%137 = fsub float %42, undef
%138 = fsub float %43, undef
%135 = fsub float %41, undef
%136 = fsub float %42, undef
%137 = fsub float %43, undef
%138 = fmul float %135, undef
%139 = fmul float %136, undef
%140 = fmul float %137, undef
%141 = fmul float %138, undef
%141 = call float @llvm.minnum.f32(float %132, float %138)
%142 = call float @llvm.minnum.f32(float %133, float %139)
%143 = call float @llvm.minnum.f32(float %134, float %140)
%144 = call float @llvm.minnum.f32(float %135, float %141)
%145 = call float @llvm.maxnum.f32(float %142, float %143)
%146 = call float @llvm.maxnum.f32(float %145, float %144)
%147 = fsub float %44, undef
%148 = fsub float %45, undef
%149 = fsub float %46, undef
%144 = call float @llvm.maxnum.f32(float %141, float %142)
%145 = call float @llvm.maxnum.f32(float %144, float %143)
%146 = fsub float %44, undef
%147 = fsub float %45, undef
%148 = fsub float %46, undef
%149 = fmul float %146, 0.000000e+00
%150 = fmul float %147, 0.000000e+00
%151 = fmul float %148, 0.000000e+00
%152 = fmul float %149, undef
%153 = fsub float %47, undef
%154 = fsub float %48, undef
%155 = fsub float %49, undef
%156 = fmul float %153, undef
%157 = fmul float %154, 0.000000e+00
%158 = fmul float %155, undef
%151 = fmul float %148, undef
%152 = fsub float %47, undef
%153 = fsub float %48, undef
%154 = fsub float %49, undef
%155 = fmul float %152, undef
%156 = fmul float %153, 0.000000e+00
%157 = fmul float %154, undef
%158 = call float @llvm.minnum.f32(float %149, float %155)
%159 = call float @llvm.minnum.f32(float %150, float %156)
%160 = call float @llvm.minnum.f32(float %151, float %157)
%161 = call float @llvm.minnum.f32(float %152, float %158)
%162 = call float @llvm.maxnum.f32(float %159, float %160)
%163 = call float @llvm.maxnum.f32(float %162, float %161)
%164 = fsub float %50, undef
%165 = fsub float %51, undef
%166 = fsub float %52, undef
%167 = fmul float %164, undef
%161 = call float @llvm.maxnum.f32(float %158, float %159)
%162 = call float @llvm.maxnum.f32(float %161, float %160)
%163 = fsub float %50, undef
%164 = fsub float %51, undef
%165 = fsub float %52, undef
%166 = fmul float %163, undef
%167 = fmul float %164, 0.000000e+00
%168 = fmul float %165, 0.000000e+00
%169 = fmul float %166, 0.000000e+00
%170 = fsub float %53, undef
%171 = fsub float %54, undef
%172 = fsub float %55, undef
%173 = fdiv float 1.000000e+00, %temp18.0
%169 = fsub float %53, undef
%170 = fsub float %54, undef
%171 = fsub float %55, undef
%172 = fdiv float 1.000000e+00, %temp18.0
%173 = fmul float %169, undef
%174 = fmul float %170, undef
%175 = fmul float %171, undef
%176 = fmul float %172, %173
%175 = fmul float %171, %172
%176 = call float @llvm.minnum.f32(float %166, float %173)
%177 = call float @llvm.minnum.f32(float %167, float %174)
%178 = call float @llvm.minnum.f32(float %168, float %175)
%179 = call float @llvm.minnum.f32(float %169, float %176)
%180 = call float @llvm.maxnum.f32(float %177, float %178)
%181 = call float @llvm.maxnum.f32(float %180, float %179)
%182 = fsub float %62, undef
%183 = fsub float %63, undef
%184 = fsub float %64, undef
%185 = fmul float %182, 0.000000e+00
%179 = call float @llvm.maxnum.f32(float %176, float %177)
%180 = call float @llvm.maxnum.f32(float %179, float %178)
%181 = fsub float %62, undef
%182 = fsub float %63, undef
%183 = fsub float %64, undef
%184 = fmul float %181, 0.000000e+00
%185 = fmul float %182, undef
%186 = fmul float %183, undef
%187 = fmul float %184, undef
%188 = fsub float %65, undef
%189 = fsub float %66, undef
%187 = fsub float %65, undef
%188 = fsub float %66, undef
%189 = fmul float %187, undef
%190 = fmul float %188, undef
%191 = fmul float %189, undef
%191 = call float @llvm.maxnum.f32(float %184, float %189)
%192 = call float @llvm.maxnum.f32(float %185, float %190)
%193 = call float @llvm.maxnum.f32(float %186, float %191)
%194 = call float @llvm.maxnum.f32(float %187, float undef)
%195 = call float @llvm.minnum.f32(float %192, float %193)
%196 = call float @llvm.minnum.f32(float %195, float %194)
%.temp292.7 = select i1 undef, float %163, float undef
%temp292.9 = select i1 false, float %181, float %.temp292.7
%193 = call float @llvm.maxnum.f32(float %186, float undef)
%194 = call float @llvm.minnum.f32(float %191, float %192)
%195 = call float @llvm.minnum.f32(float %194, float %193)
%.temp292.7 = select i1 undef, float %162, float undef
%temp292.9 = select i1 false, float %180, float %.temp292.7
%.temp292.9 = select i1 undef, float undef, float %temp292.9
%197 = fcmp ogt float undef, 0.000000e+00
%198 = fcmp olt float undef, %196
%199 = and i1 %197, %198
%200 = fcmp olt float undef, %.temp292.9
%201 = and i1 %199, %200
%temp292.11 = select i1 %201, float undef, float %.temp292.9
%196 = fcmp ogt float undef, 0.000000e+00
%197 = fcmp olt float undef, %195
%198 = and i1 %196, %197
%199 = fcmp olt float undef, %.temp292.9
%200 = and i1 %198, %199
%temp292.11 = select i1 %200, float undef, float %.temp292.9
br i1 undef, label %IF2565, label %ELSE2566
IF2565: ; preds = %ENDIF
br i1 false, label %ENDIF2582, label %ELSE2584
ELSE2566: ; preds = %ENDIF
%202 = fcmp oeq float %temp292.11, 1.000000e+04
br i1 %202, label %ENDLOOP, label %ELSE2593
%201 = fcmp oeq float %temp292.11, 1.000000e+04
br i1 %201, label %ENDLOOP, label %ELSE2593
ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
%temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
%temp18.1 = phi float [ %219, %ENDIF2588 ], [ undef, %ENDIF2594 ]
%203 = fsub float %5, undef
%204 = fmul float %203, undef
%205 = call float @llvm.maxnum.f32(float undef, float %204)
%temp18.1 = phi float [ %218, %ENDIF2588 ], [ undef, %ENDIF2594 ]
%202 = fsub float %5, undef
%203 = fmul float %202, undef
%204 = call float @llvm.maxnum.f32(float undef, float %203)
%205 = call float @llvm.minnum.f32(float %204, float undef)
%206 = call float @llvm.minnum.f32(float %205, float undef)
%207 = call float @llvm.minnum.f32(float %206, float undef)
%208 = fcmp ogt float undef, 0.000000e+00
%209 = fcmp olt float undef, 1.000000e+00
%210 = and i1 %208, %209
%211 = fcmp olt float undef, %207
%212 = and i1 %210, %211
br i1 %212, label %ENDIF2795, label %ELSE2797
%207 = fcmp ogt float undef, 0.000000e+00
%208 = fcmp olt float undef, 1.000000e+00
%209 = and i1 %207, %208
%210 = fcmp olt float undef, %206
%211 = and i1 %209, %210
br i1 %211, label %ENDIF2795, label %ELSE2797
ELSE2584: ; preds = %IF2565
br label %ENDIF2582
ENDIF2582: ; preds = %ELSE2584, %IF2565
%213 = fadd float %1, undef
%214 = fadd float 0.000000e+00, %213
%floor = call float @llvm.floor.f32(float %214)
%215 = fsub float %214, %floor
%212 = fadd float %1, undef
%213 = fadd float 0.000000e+00, %212
%floor = call float @llvm.floor.f32(float %213)
%214 = fsub float %213, %floor
br i1 undef, label %IF2589, label %ELSE2590
IF2589: ; preds = %ENDIF2582
@ -267,61 +269,61 @@ ELSE2590: ; preds = %ENDIF2582
br label %ENDIF2588
ENDIF2588: ; preds = %ELSE2590, %IF2589
%216 = fsub float 1.000000e+00, %215
%217 = call float @llvm.sqrt.f32(float %216)
%218 = fmul float %217, undef
%219 = fadd float %218, undef
%215 = fsub float 1.000000e+00, %214
%216 = call float @llvm.sqrt.f32(float %215)
%217 = fmul float %216, undef
%218 = fadd float %217, undef
br label %ENDIF2564
ELSE2593: ; preds = %ELSE2566
%220 = fcmp oeq float %temp292.11, %82
%221 = fcmp olt float %82, %84
%222 = and i1 %220, %221
br i1 %222, label %ENDIF2594, label %ELSE2596
%219 = fcmp oeq float %temp292.11, %81
%220 = fcmp olt float %81, %83
%221 = and i1 %219, %220
br i1 %221, label %ENDIF2594, label %ELSE2596
ELSE2596: ; preds = %ELSE2593
%223 = fcmp oeq float %temp292.11, %101
%224 = fcmp olt float %101, %103
%225 = and i1 %223, %224
br i1 %225, label %ENDIF2594, label %ELSE2632
%222 = fcmp oeq float %temp292.11, %100
%223 = fcmp olt float %100, %102
%224 = and i1 %222, %223
br i1 %224, label %ENDIF2594, label %ELSE2632
ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
%temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
%226 = fmul float %temp894.2, undef
%225 = fmul float %temp894.2, undef
br label %ENDIF2564
ELSE2632: ; preds = %ELSE2596
br i1 undef, label %ENDIF2594, label %ELSE2650
ELSE2650: ; preds = %ELSE2632
%227 = fcmp oeq float %temp292.11, %111
%228 = fcmp olt float %111, %112
%229 = and i1 %227, %228
br i1 %229, label %IF2667, label %ELSE2668
%226 = fcmp oeq float %temp292.11, %110
%227 = fcmp olt float %110, %111
%228 = and i1 %226, %227
br i1 %228, label %IF2667, label %ELSE2668
IF2667: ; preds = %ELSE2650
br i1 undef, label %ENDIF2594, label %ELSE2671
ELSE2668: ; preds = %ELSE2650
%230 = fcmp oeq float %temp292.11, %129
%231 = fcmp olt float %129, undef
%232 = and i1 %230, %231
br i1 %232, label %ENDIF2594, label %ELSE2686
%229 = fcmp oeq float %temp292.11, %128
%230 = fcmp olt float %128, undef
%231 = and i1 %229, %230
br i1 %231, label %ENDIF2594, label %ELSE2686
ELSE2671: ; preds = %IF2667
br label %ENDIF2594
ELSE2686: ; preds = %ELSE2668
%233 = fcmp oeq float %temp292.11, %146
%234 = fcmp olt float %146, undef
%235 = and i1 %233, %234
br i1 %235, label %ENDIF2594, label %ELSE2704
%232 = fcmp oeq float %temp292.11, %145
%233 = fcmp olt float %145, undef
%234 = and i1 %232, %233
br i1 %234, label %ENDIF2594, label %ELSE2704
ELSE2704: ; preds = %ELSE2686
%236 = fcmp oeq float %temp292.11, %181
%237 = fcmp olt float %181, undef
%238 = and i1 %236, %237
br i1 %238, label %ENDIF2594, label %ELSE2740
%235 = fcmp oeq float %temp292.11, %180
%236 = fcmp olt float %180, undef
%237 = and i1 %235, %236
br i1 %237, label %ENDIF2594, label %ELSE2740
ELSE2740: ; preds = %ELSE2704
br i1 undef, label %IF2757, label %ELSE2758
@ -336,8 +338,8 @@ ELSE2761: ; preds = %IF2757
br label %ENDIF2594
IF2775: ; preds = %ELSE2758
%239 = fcmp olt float undef, undef
br i1 %239, label %ENDIF2594, label %ELSE2779
%238 = fcmp olt float undef, undef
br i1 %238, label %ENDIF2594, label %ELSE2779
ELSE2779: ; preds = %IF2775
br i1 undef, label %ENDIF2594, label %ELSE2782
@ -346,39 +348,39 @@ ELSE2782: ; preds = %ELSE2779
br i1 undef, label %ENDIF2594, label %ELSE2785
ELSE2785: ; preds = %ELSE2782
%240 = fcmp olt float undef, 0.000000e+00
br i1 %240, label %ENDIF2594, label %ELSE2788
%239 = fcmp olt float undef, 0.000000e+00
br i1 %239, label %ENDIF2594, label %ELSE2788
ELSE2788: ; preds = %ELSE2785
%241 = fcmp olt float 0.000000e+00, undef
%.2848 = select i1 %241, float -1.000000e+00, float 1.000000e+00
%240 = fcmp olt float 0.000000e+00, undef
%.2848 = select i1 %240, float -1.000000e+00, float 1.000000e+00
br label %ENDIF2594
ELSE2797: ; preds = %ENDIF2564
%242 = fsub float %8, undef
%243 = fsub float %9, undef
%244 = fsub float %10, undef
%241 = fsub float %8, undef
%242 = fsub float %9, undef
%243 = fsub float %10, undef
%244 = fmul float %241, undef
%245 = fmul float %242, undef
%246 = fmul float %243, undef
%247 = fmul float %244, undef
%248 = fsub float %11, undef
%249 = fsub float %12, undef
%250 = fsub float %13, undef
%247 = fsub float %11, undef
%248 = fsub float %12, undef
%249 = fsub float %13, undef
%250 = fmul float %247, undef
%251 = fmul float %248, undef
%252 = fmul float %249, undef
%253 = fmul float %250, undef
%253 = call float @llvm.minnum.f32(float %244, float %250)
%254 = call float @llvm.minnum.f32(float %245, float %251)
%255 = call float @llvm.minnum.f32(float %246, float %252)
%256 = call float @llvm.maxnum.f32(float %247, float %253)
%257 = call float @llvm.maxnum.f32(float %254, float %255)
%258 = call float @llvm.maxnum.f32(float %257, float undef)
%259 = call float @llvm.minnum.f32(float undef, float %256)
%260 = fcmp ogt float %258, 0.000000e+00
%261 = fcmp olt float %258, 1.000000e+00
%262 = and i1 %260, %261
%263 = fcmp olt float %258, %259
%264 = and i1 %262, %263
br i1 %264, label %ENDIF2795, label %ELSE2800
%255 = call float @llvm.maxnum.f32(float %246, float %252)
%256 = call float @llvm.maxnum.f32(float %253, float %254)
%257 = call float @llvm.maxnum.f32(float %256, float undef)
%258 = call float @llvm.minnum.f32(float undef, float %255)
%259 = fcmp ogt float %257, 0.000000e+00
%260 = fcmp olt float %257, 1.000000e+00
%261 = and i1 %259, %260
%262 = fcmp olt float %257, %258
%263 = and i1 %261, %262
br i1 %263, label %ENDIF2795, label %ELSE2800
ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
br label %LOOP
@ -387,53 +389,53 @@ ELSE2800: ; preds = %ELSE2797
br i1 undef, label %ENDIF2795, label %ELSE2803
ELSE2803: ; preds = %ELSE2800
%265 = fsub float %20, undef
%266 = fsub float %21, undef
%267 = fsub float %22, undef
%264 = fsub float %20, undef
%265 = fsub float %21, undef
%266 = fsub float %22, undef
%267 = fmul float %264, undef
%268 = fmul float %265, undef
%269 = fmul float %266, undef
%270 = fmul float %267, 0.000000e+00
%271 = fsub float %23, undef
%272 = fsub float %24, undef
%273 = fsub float %25, undef
%269 = fmul float %266, 0.000000e+00
%270 = fsub float %23, undef
%271 = fsub float %24, undef
%272 = fsub float %25, undef
%273 = fmul float %270, undef
%274 = fmul float %271, undef
%275 = fmul float %272, undef
%276 = fmul float %273, undef
%277 = call float @llvm.minnum.f32(float %268, float %274)
%276 = call float @llvm.minnum.f32(float %267, float %273)
%277 = call float @llvm.maxnum.f32(float %268, float %274)
%278 = call float @llvm.maxnum.f32(float %269, float %275)
%279 = call float @llvm.maxnum.f32(float %270, float %276)
%280 = call float @llvm.maxnum.f32(float %277, float undef)
%281 = call float @llvm.maxnum.f32(float %280, float undef)
%282 = call float @llvm.minnum.f32(float undef, float %278)
%283 = call float @llvm.minnum.f32(float %282, float %279)
%284 = fcmp ogt float %281, 0.000000e+00
%285 = fcmp olt float %281, 1.000000e+00
%286 = and i1 %284, %285
%287 = fcmp olt float %281, %283
%288 = and i1 %286, %287
br i1 %288, label %ENDIF2795, label %ELSE2806
%279 = call float @llvm.maxnum.f32(float %276, float undef)
%280 = call float @llvm.maxnum.f32(float %279, float undef)
%281 = call float @llvm.minnum.f32(float undef, float %277)
%282 = call float @llvm.minnum.f32(float %281, float %278)
%283 = fcmp ogt float %280, 0.000000e+00
%284 = fcmp olt float %280, 1.000000e+00
%285 = and i1 %283, %284
%286 = fcmp olt float %280, %282
%287 = and i1 %285, %286
br i1 %287, label %ENDIF2795, label %ELSE2806
ELSE2806: ; preds = %ELSE2803
%289 = fsub float %26, undef
%290 = fsub float %27, undef
%291 = fsub float %28, undef
%292 = fmul float %289, undef
%293 = fmul float %290, 0.000000e+00
%294 = fmul float %291, undef
%295 = fsub float %29, undef
%296 = fmul float %295, undef
%297 = call float @llvm.minnum.f32(float %292, float %296)
%298 = call float @llvm.minnum.f32(float %293, float undef)
%299 = call float @llvm.maxnum.f32(float %294, float undef)
%300 = call float @llvm.maxnum.f32(float %297, float %298)
%301 = call float @llvm.maxnum.f32(float %300, float undef)
%302 = call float @llvm.minnum.f32(float undef, float %299)
%303 = fcmp ogt float %301, 0.000000e+00
%304 = fcmp olt float %301, 1.000000e+00
%305 = and i1 %303, %304
%306 = fcmp olt float %301, %302
%307 = and i1 %305, %306
br i1 %307, label %ENDIF2795, label %ELSE2809
%288 = fsub float %26, undef
%289 = fsub float %27, undef
%290 = fsub float %28, undef
%291 = fmul float %288, undef
%292 = fmul float %289, 0.000000e+00
%293 = fmul float %290, undef
%294 = fsub float %29, undef
%295 = fmul float %294, undef
%296 = call float @llvm.minnum.f32(float %291, float %295)
%297 = call float @llvm.minnum.f32(float %292, float undef)
%298 = call float @llvm.maxnum.f32(float %293, float undef)
%299 = call float @llvm.maxnum.f32(float %296, float %297)
%300 = call float @llvm.maxnum.f32(float %299, float undef)
%301 = call float @llvm.minnum.f32(float undef, float %298)
%302 = fcmp ogt float %300, 0.000000e+00
%303 = fcmp olt float %300, 1.000000e+00
%304 = and i1 %302, %303
%305 = fcmp olt float %300, %301
%306 = and i1 %304, %305
br i1 %306, label %ENDIF2795, label %ELSE2809
ELSE2809: ; preds = %ELSE2806
br i1 undef, label %ENDIF2795, label %ELSE2812
@ -448,29 +450,29 @@ ELSE2818: ; preds = %ELSE2815
br i1 undef, label %ENDIF2795, label %ELSE2821
ELSE2821: ; preds = %ELSE2818
%308 = fsub float %56, undef
%309 = fsub float %57, undef
%310 = fsub float %58, undef
%311 = fmul float %308, undef
%312 = fmul float %309, 0.000000e+00
%313 = fmul float %310, undef
%314 = fsub float %59, undef
%315 = fsub float %60, undef
%316 = fsub float %61, undef
%307 = fsub float %56, undef
%308 = fsub float %57, undef
%309 = fsub float %58, undef
%310 = fmul float %307, undef
%311 = fmul float %308, 0.000000e+00
%312 = fmul float %309, undef
%313 = fsub float %59, undef
%314 = fsub float %60, undef
%315 = fsub float %61, undef
%316 = fmul float %313, undef
%317 = fmul float %314, undef
%318 = fmul float %315, undef
%319 = fmul float %316, undef
%319 = call float @llvm.maxnum.f32(float %310, float %316)
%320 = call float @llvm.maxnum.f32(float %311, float %317)
%321 = call float @llvm.maxnum.f32(float %312, float %318)
%322 = call float @llvm.maxnum.f32(float %313, float %319)
%323 = call float @llvm.minnum.f32(float %320, float %321)
%324 = call float @llvm.minnum.f32(float %323, float %322)
%325 = fcmp ogt float undef, 0.000000e+00
%326 = fcmp olt float undef, 1.000000e+00
%327 = and i1 %325, %326
%328 = fcmp olt float undef, %324
%329 = and i1 %327, %328
br i1 %329, label %ENDIF2795, label %ELSE2824
%322 = call float @llvm.minnum.f32(float %319, float %320)
%323 = call float @llvm.minnum.f32(float %322, float %321)
%324 = fcmp ogt float undef, 0.000000e+00
%325 = fcmp olt float undef, 1.000000e+00
%326 = and i1 %324, %325
%327 = fcmp olt float undef, %323
%328 = and i1 %326, %327
br i1 %328, label %ENDIF2795, label %ELSE2824
ELSE2824: ; preds = %ELSE2821
%.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
@ -478,25 +480,22 @@ ELSE2824: ; preds = %ELSE2821
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
declare float @llvm.SI.load.const(<16 x i8>, i32) #2
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
declare float @llvm.floor.f32(float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
declare float @llvm.sqrt.f32(float) #2
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
declare float @llvm.minnum.f32(float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare float @llvm.maxnum.f32(float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #0 = { alwaysinline nounwind readnone }
attributes #1 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #2 = { nounwind readnone }