1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AMDGPU: Use source modifiers with f16->f32 conversions

The operand types were defined to fit the fp16_to_fp node, which
has the half as an integer type. v_cvt_f32_f16 does support
source modifiers, so change this to have an FP type and modifiers.

For targets without legal f16, this requires recognizing the
bit operations and trying to produce them.

llvm-svn: 293857
This commit is contained in:
Matt Arsenault 2017-02-02 02:27:04 +00:00
parent 0d46e1f86e
commit 93d42a5136
16 changed files with 463 additions and 116 deletions

View File

@ -479,6 +479,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FABS);
}
//===----------------------------------------------------------------------===//
@ -2968,6 +2969,45 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
}
case ISD::FP16_TO_FP: {
// v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
// f16, but legalization of f16 fneg ends up pulling it out of the source.
// Put the fneg back as a legal source operation that can be matched later.
SDLoc SL(N);
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
// fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
DAG.getConstant(0x8000, SL, SrcVT));
return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
}
default:
return SDValue();
}
}
SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
if (!N0.hasOneUse())
return SDValue();
switch (N0.getOpcode()) {
case ISD::FP16_TO_FP: {
assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
SDLoc SL(N);
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
// fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
DAG.getConstant(0x7fff, SL, SrcVT));
return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
}
default:
return SDValue();
}
@ -3080,6 +3120,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performSelectCombine(N, DCI);
case ISD::FNEG:
return performFNegCombine(N, DCI);
case ISD::FABS:
return performFAbsCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&

View File

@ -85,6 +85,7 @@ protected:
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);

View File

@ -637,6 +637,9 @@ def umax_oneuse : HasOneUseBinOp<umax>;
def umin_oneuse : HasOneUseBinOp<umin>;
def fminnum_oneuse : HasOneUseBinOp<fminnum>;
def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
def and_oneuse : HasOneUseBinOp<and>;
def or_oneuse : HasOneUseBinOp<or>;
def xor_oneuse : HasOneUseBinOp<xor>;
} // Properties = [SDNPCommutative, SDNPAssociative]
def sub_oneuse : HasOneUseBinOp<sub>;

View File

@ -226,6 +226,8 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case 8:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
case 2:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
// which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
default:
llvm_unreachable("invalid operand size");

View File

@ -1824,7 +1824,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
case 16:
return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
@ -1854,8 +1855,13 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
}
case 16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
// A few special case instructions have 16-bit operands on subtargets
// where 16-bit instructions are not legal.
// TODO: Do the 32-bit immediates work? We shouldn't really need to handle
// constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
return ST.has16BitInsts() &&
AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
}
return false;

View File

@ -619,6 +619,8 @@ def SIOperand {
def SRCMODS {
int NONE = 0;
int NEG = 1;
int ABS = 2;
int NEG_ABS = 3;
}
def DSTCLAMP {
@ -1132,6 +1134,8 @@ def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;

View File

@ -430,9 +430,26 @@ def : Pat <
} // End Predicates = [UnsafeFPMath]
// f16_to_fp patterns
def : Pat <
(f32 (fpextend f16:$src)),
(V_CVT_F32_F16_e32 $src)
(f32 (f16_to_fp i32:$src0)),
(V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
(f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
(V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
(f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
(V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
(f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
(V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
@ -440,9 +457,10 @@ def : Pat <
(V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
>;
// fp_to_fp16 patterns
def : Pat <
(f16 (fpround f32:$src)),
(V_CVT_F16_F32_e32 $src)
(i32 (fp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
(V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
>;
def : Pat <

View File

@ -147,8 +147,8 @@ defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;

View File

@ -19,8 +19,7 @@ define void @fabs_free_f16(half addrspace(1)* %out, i16 %in) {
; GCN-LABEL: {{^}}fabs_f16:
; CI: flat_load_ushort [[VAL:v[0-9]+]],
; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]]
; CI: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], |[[CVT0]]|
; CI: v_and_b32_e32 [[CVT0:v[0-9]+]], 0x7fff, [[VAL]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
define void @fabs_f16(half addrspace(1)* %out, half %in) {
%fabs = call half @llvm.fabs.f16(half %in)
@ -30,10 +29,10 @@ define void @fabs_f16(half addrspace(1)* %out, half %in) {
; FIXME: Should be able to use single and
; GCN-LABEL: {{^}}fabs_v2f16:
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI: flat_load_ushort [[LO:v[0-9]+]]
; VI: flat_load_ushort [[HI:v[0-9]+]]
@ -51,10 +50,11 @@ define void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
}
; GCN-LABEL: {{^}}fabs_v4f16:
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: s_movk_i32 [[MASK:s[0-9]+]], 0x7fff
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; CI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
; VI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7fff{{$}}
; VI: v_and_b32_e32 v{{[0-9]+}}, [[MASK]], v{{[0-9]+}}
@ -72,9 +72,10 @@ define void @fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
; GCN-LABEL: {{^}}fabs_fold_f16:
; GCN: flat_load_ushort [[IN0:v[0-9]+]]
; GCN: flat_load_ushort [[IN1:v[0-9]+]]
; CI-DAG: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[IN0]]
; CI-DAG: v_cvt_f32_f16_e32 [[CVT1:v[0-9]+]], [[IN1]]
; CI: v_mul_f32_e64 [[RESULT:v[0-9]+]], |[[CVT1]]|, [[CVT0]]
; CI-DAG: v_cvt_f32_f16_e64 [[ABS_CVT1:v[0-9]+]], |[[IN1]]|
; CI: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[CVT0]], [[ABS_CVT1]]
; CI: v_cvt_f16_f32_e32 [[CVTRESULT:v[0-9]+]], [[RESULT]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVTRESULT]]

View File

@ -28,10 +28,10 @@ entry:
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_cvt_f32_f16_e64 v[[A_F32:[0-9]+]], |v[[A_F16]]|
; SI: v_cvt_f32_f16_e64 v[[B_F32:[0-9]+]], |v[[B_F16]]|
; SI: v_cmp_lt_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F32]]|, |v[[B_F32]]|
; SI: v_cmp_lt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
; VI: v_cmp_lt_f16_e64 s{{\[[0-9]+:[0-9]+\]}}, |v[[A_F16]]|, |v[[B_F16]]|
; GCN: v_cndmask_b32_e64 v[[R_I32:[0-9]+]]

View File

@ -3,8 +3,8 @@
; GCN-LABEL: {{^}}fneg_fabs_fadd_f16:
; CI: v_cvt_f32_f16_e32
; CI: v_cvt_f32_f16_e32
; CI: v_sub_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|
; CI: v_cvt_f32_f16_e64 [[CVT_ABS_X:v[0-9]+]], |v{{[0-9]+}}|
; CI: v_subrev_f32_e32 v{{[0-9]+}}, [[CVT_ABS_X]], v{{[0-9]+}}
; VI-NOT: and
; VI: v_sub_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|
@ -17,14 +17,15 @@ define void @fneg_fabs_fadd_f16(half addrspace(1)* %out, half %x, half %y) {
}
; GCN-LABEL: {{^}}fneg_fabs_fmul_f16:
; CI: v_cvt_f32_f16_e32
; CI: v_cvt_f32_f16_e32
; CI: v_mul_f32_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
; CI-DAG: v_cvt_f32_f16_e32
; CI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG_ABS_X:v[0-9]+]], -|{{v[0-9]+}}|
; CI: v_mul_f32_e32 {{v[0-9]+}}, [[CVT_NEG_ABS_X]], {{v[0-9]+}}
; CI: v_cvt_f16_f32_e32
; VI-NOT: and
; VI: v_mul_f16_e64 {{v[0-9]+}}, {{v[0-9]+}}, -|{{v[0-9]+}}|
; VI-NOT: and
; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}|
; VI-NOT: [[MUL]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]]
define void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) {
%fabs = call half @llvm.fabs.f16(half %x)
%fsub = fsub half -0.000000e+00, %fabs
@ -49,10 +50,7 @@ define void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
; FIXME: Should use or
; GCN-LABEL: {{^}}fneg_fabs_f16:
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
%fabs = call half @llvm.fabs.f16(half %in)
%fsub = fsub half -0.000000e+00, %fabs
@ -61,10 +59,7 @@ define void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
}
; GCN-LABEL: {{^}}v_fneg_fabs_f16:
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}},
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; VI: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
; GCN: v_or_b32_e32 v{{[0-9]+}}, 0x8000, v{{[0-9]+}}
define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%val = load half, half addrspace(1)* %in, align 2
%fabs = call half @llvm.fabs.f16(half %val)
@ -75,13 +70,10 @@ define void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
; FIXME: single bit op
; GCN-LABEL: {{^}}fneg_fabs_v2f16:
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: flat_store_dword
; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: store_dword
define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fsub = fsub <2 x half> <half -0.000000e+00, half -0.000000e+00>, %fabs
@ -90,17 +82,12 @@ define void @fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
}
; GCN-LABEL: {{^}}fneg_fabs_v4f16:
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; CI: v_cvt_f16_f32_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|
; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; VI: flat_store_dwordx2
; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}}
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: v_or_b32_e32 v{{[0-9]+}}, [[MASK]],
; GCN: store_dwordx2
define void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
%fsub = fsub <4 x half> <half -0.000000e+00, half -0.000000e+00, half -0.000000e+00, half -0.000000e+00>, %fabs

View File

@ -15,13 +15,9 @@ define void @s_fneg_f16(half addrspace(1)* %out, half %in) {
; FUNC-LABEL: {{^}}v_fneg_f16:
; GCN: flat_load_ushort [[VAL:v[0-9]+]],
; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[VAL]]
; CI: v_cvt_f16_f32_e64 [[CVT1:v[0-9]+]], -[[CVT0]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]
; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]]
; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]]
; SI: buffer_store_short [[XOR]]
define void @v_fneg_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
%val = load half, half addrspace(1)* %in, align 2
%fneg = fsub half -0.000000e+00, %val
@ -45,8 +41,9 @@ define void @fneg_free_f16(half addrspace(1)* %out, i16 %in) {
; FUNC-LABEL: {{^}}v_fneg_fold_f16:
; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]]
; CI: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[CVT0]]
; CI: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[CVT0]], [[CVT0]]
; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]]
; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]]
; CI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[CVT_VAL]], [[NEG_CVT0]]
; CI: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], [[MUL]]
; CI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[CVT1]]

View File

@ -68,3 +68,202 @@ entry:
store <2 x double> %r.val, <2 x double> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}s_fneg_fpext_f16_to_f32:
; GCN: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}
define void @s_fneg_fpext_f16_to_f32(float addrspace(1)* %r, i32 %a) {
entry:
%a.trunc = trunc i32 %a to i16
%a.val = bitcast i16 %a.trunc to half
%r.val = fpext half %a.val to float
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -[[A]]
define void @fneg_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fabs_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, |[[A]]|
define void @fabs_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fabs_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN: v_cvt_f32_f16_e64 v{{[0-9]+}}, -|[[A]]|
define void @fneg_fabs_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
store float %r.val, float addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_multi_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN-DAG: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[A]]
; FIXME: Using the source modifier here only wastes code size
; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]]
; GCN: store_dword [[CVT]]
; GCN: store_short [[XOR]]
define void @fneg_multi_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
store volatile float %r.val, float addrspace(1)* %r
store volatile half %a.neg, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}fneg_multi_foldable_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN-DAG: v_cvt_f32_f16_e64 [[CVTA_NEG:v[0-9]+]], -[[A]]
; SI-DAG: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
; SI: v_mul_f32_e32 [[MUL_F32:v[0-9]+]], [[CVTA]], [[CVTA_NEG]]
; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
; VI-DAG: v_cvt_f32_f16_e64 [[CVT_NEGA:v[0-9]+]], -[[A]]
; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], -[[A]], [[A]]
; GCN: buffer_store_dword [[CVTA_NEG]]
; GCN: buffer_store_short [[MUL]]
define void @fneg_multi_foldable_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.neg = fsub half -0.0, %a.val
%r.val = fpext half %a.neg to float
%mul = fmul half %a.neg, %a.val
store volatile float %r.val, float addrspace(1)* %r
store volatile half %mul, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}fabs_multi_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN-DAG: v_and_b32_e32 [[XOR:v[0-9]+]], 0x7fff, [[A]]
; SI-DAG: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[A]]
; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], |[[A]]|
; GCN: store_dword [[CVT]]
; GCN: store_short [[XOR]]
define void @fabs_multi_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
store volatile float %r.val, float addrspace(1)* %r
store volatile half %a.fabs, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}fabs_multi_foldable_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], |[[CVTA]]|, [[CVTA]]
; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[CVTA]]
; VI-DAG: v_cvt_f32_f16_e64 [[ABS_A:v[0-9]+]], |[[A]]|
; VI: v_mul_f16_e64 [[MUL:v[0-9]+]], |[[A]]|, [[A]]
; GCN: buffer_store_dword [[ABS_A]]
; GCN: buffer_store_short [[MUL]]
define void @fabs_multi_foldable_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%r.val = fpext half %a.fabs to float
%mul = fmul half %a.fabs, %a.val
store volatile float %r.val, float addrspace(1)* %r
store volatile half %mul, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}fabs_fneg_multi_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], 0x8000, [[A]]
; SI: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[OR]]
; VI-DAG: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[OR]]|
; GCN: buffer_store_dword [[CVT]]
; GCN: buffer_store_short [[OR]]
define void @fabs_fneg_multi_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
store volatile float %r.val, float addrspace(1)* %r
store volatile half %a.fneg.fabs, half addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}fabs_fneg_multi_foldable_use_fpext_f16_to_f32:
; GCN: {{buffer|flat}}_load_ushort [[A:v[0-9]+]]
; SI: v_cvt_f32_f16_e32 [[CVTA:v[0-9]+]], [[A]]
; SI: v_mul_f32_e64 [[MUL_F32:v[0-9]+]], -|[[CVTA]]|, [[CVTA]]
; SI: v_cvt_f16_f32_e32 [[MUL:v[0-9]+]], [[MUL_F32]]
; SI: v_or_b32_e32 [[FABS_FNEG:v[0-9]+]], 0x80000000, [[CVTA]]
; VI-DAG: v_cvt_f32_f16_e64 [[FABS_FNEG:v[0-9]+]], -|[[A]]|
; VI-DAG: v_mul_f16_e64 [[MUL:v[0-9]+]], -|[[A]]|, [[A]]
; GCN: buffer_store_dword [[FABS_FNEG]]
; GCN: buffer_store_short [[MUL]]
define void @fabs_fneg_multi_foldable_use_fpext_f16_to_f32(
float addrspace(1)* %r,
half addrspace(1)* %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.fabs = call half @llvm.fabs.f16(half %a.val)
%a.fneg.fabs = fsub half -0.0, %a.fabs
%r.val = fpext half %a.fneg.fabs to float
%mul = fmul half %a.fneg.fabs, %a.val
store volatile float %r.val, float addrspace(1)* %r
store volatile half %mul, half addrspace(1)* undef
ret void
}
declare half @llvm.fabs.f16(half) #1
attributes #1 = { nounwind readnone }

View File

@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fptrunc_f32_to_f16
; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
@ -16,7 +16,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}fptrunc_f64_to_f16
; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
@ -32,7 +32,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16
; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
@ -51,7 +51,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16
; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
@ -70,3 +70,56 @@ entry:
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fneg_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fneg = fsub float -0.0, %a.val
%r.val = fptrunc float %a.fneg to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fabs_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fabs = call float @llvm.fabs.f32(float %a.val)
%r.val = fptrunc float %a.fabs to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]|
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fneg_fabs_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fabs = call float @llvm.fabs.f32(float %a.val)
%a.fneg.fabs = fsub float -0.0, %a.fabs
%r.val = fptrunc float %a.fneg.fabs to half
store half %r.val, half addrspace(1)* %r
ret void
}
declare float @llvm.fabs.f32(float) #1
attributes #1 = { nounwind readnone }

View File

@ -31,9 +31,10 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_same_add
; GCN-LABEL: {{^}}mac_f16_same_add:
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
; SI: v_mac_f32_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD:v[0-9]+]]
; VI: v_mac_f16_e32 [[ADD]], v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
@ -63,9 +64,11 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_a
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-LABEL: {{^}}mac_f16_neg_a:
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
@ -87,9 +90,10 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_b
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-LABEL: {{^}}mac_f16_neg_b:
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: s_endpgm
@ -111,9 +115,12 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_c
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; GCN-LABEL: {{^}}mac_f16_neg_c:
; SI: v_cvt_f32_f16_e32
; SI-DAG: v_cvt_f32_f16_e32
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}}
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; GCN: s_endpgm
@ -207,9 +214,11 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN-LABEL: {{^}}mac_f16_neg_a_unsafe_fp_math:
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN: s_endpgm
@ -231,9 +240,11 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN-LABEL: {{^}}mac_f16_neg_b_unsafe_fp_math:
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI-DAG: v_cvt_f32_f16_e32 [[CVT_OTHER:v[0-9]+]], v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, [[CVT_OTHER]], [[CVT_NEG]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]}}
; GCN: s_endpgm
@ -255,9 +266,12 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
; GCN-LABEL: {{^}}mac_f16_neg_c_unsafe_fp_math:
; SI: v_cvt_f32_f16_e32
; SI: v_cvt_f32_f16_e32
; SI-DAG: v_cvt_f32_f16_e64 [[CVT_NEG:v[0-9]+]], -v{{[0-9]+}}
; SI: v_mac_f32_e32 [[CVT_NEG]], v{{[0-9]+}}, v{{[0-9]+}}
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]}}
; GCN: s_endpgm
@ -279,7 +293,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16
; GCN-LABEL: {{^}}mac_v2f16:
; GCN: {{buffer|flat}}_load_dword v[[A_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[B_V2_F16:[0-9]+]]
; GCN: {{buffer|flat}}_load_dword v[[C_V2_F16:[0-9]+]]
@ -322,7 +336,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_same_add
; GCN-LABEL: {{^}}mac_v2f16_same_add:
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD0:v[0-9]+]]
; SI: v_mad_f32 v{{[0-9]}}, v{{[0-9]+}}, v{{[0-9]+}}, [[ADD1:v[0-9]+]]
; SI: v_mac_f32_e32 [[ADD0]], v{{[0-9]+}}, v{{[0-9]+}}
@ -358,10 +372,13 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_a
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN-LABEL: {{^}}mac_v2f16_neg_a:
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@ -385,9 +402,12 @@ entry:
}
; GCN-LABEL: {{^}}mac_v2f16_neg_b
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@ -410,10 +430,13 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_c
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; GCN-LABEL: {{^}}mac_v2f16_neg_c:
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}}
; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}}
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}
@ -464,7 +487,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math
; GCN-LABEL: {{^}}mac_v2f16_neg_b_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_mac_f32_e32 v{{[0-9]+}}, v[[NEG_A0]], v{{[0-9]+}}
@ -492,7 +515,7 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math
; GCN-LABEL: {{^}}mac_v2f16_neg_c_safe_fp_math:
; SI: v_sub_f32_e32 v[[NEG_A0:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_sub_f32_e32 v[[NEG_A1:[0-9]+]], 0, v{{[0-9]+}}
; SI: v_mac_f32_e32 v[[NEG_A0]], v{{[0-9]+}}, v{{[0-9]+}}
@ -520,10 +543,13 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; GCN-LABEL: {{^}}mac_v2f16_neg_a_unsafe_fp_math:
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
@ -546,10 +572,13 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; SI: v_mad_f32 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; GCN-LABEL: {{^}}mac_v2f16_neg_b_unsafe_fp_math:
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG0]]
; SI-DAG: v_mac_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[CVT_NEG1]]
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, v{{[-0-9]}}
@ -572,10 +601,13 @@ entry:
ret void
}
; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math
; SI-NOT: v_mac_f32
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
; SI: v_mad_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
; GCN-LABEL: {{^}}mac_v2f16_neg_c_unsafe_fp_math:
; SI: v_cvt_f32_f16_e64 [[CVT_NEG0:v[0-9]+]], -{{v[0-9]+}}
; SI: v_cvt_f32_f16_e64 [[CVT_NEG1:v[0-9]+]], -{{v[0-9]+}}
; SI-DAG: v_mac_f32_e32 [[CVT_NEG0]], v{{[0-9]+}}, v{{[0-9]+}}
; SI-DAG: v_mac_f32_e32 [[CVT_NEG1]], v{{[0-9]+}}, v{{[0-9]+}}
; VI-NOT: v_mac_f16
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}
; VI: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, -v{{[-0-9]}}

View File

@ -24330,14 +24330,16 @@ v_cvt_f32_f16_e64 v0, exec_hi
v_cvt_f32_f16_e64 v0, 0
// CHECK: [0x00,0x00,0x4b,0xd1,0x80,0x00,0x00,0x00]
// FIXME: Parsing source modifiers
v_cvt_f32_f16_e64 v0, -1
// CHECK: [0x00,0x00,0x4b,0xd1,0xc1,0x00,0x00,0x00]
// CHECK: [0x00,0x00,0x4b,0xd1,0x81,0x00,0x00,0x20]
v_cvt_f32_f16_e64 v0, 0.5
// CHECK: [0x00,0x00,0x4b,0xd1,0xf0,0x00,0x00,0x00]
// FIXME: Parsing source modifiers
v_cvt_f32_f16_e64 v0, -4.0
// CHECK: [0x00,0x00,0x4b,0xd1,0xf7,0x00,0x00,0x00]
// CHECK: [0x00,0x00,0x4b,0xd1,0xf6,0x00,0x00,0x20]
v_cvt_f32_f16_e64 v0, v0
// CHECK: [0x00,0x00,0x4b,0xd1,0x00,0x01,0x00,0x00]