mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
AMDGPU: Add Vega12 and Vega20
Changes by Matt Arsenault Konstantin Zhuravlyov llvm-svn: 331215
This commit is contained in:
parent
2a5c2d1d3e
commit
23fa05864d
@ -200,6 +200,16 @@ names from both the *Processor* and *Alternative Processor* can be used.
|
||||
- Radeon Instinct MI25
|
||||
``gfx902`` ``amdgcn`` APU - xnack - Ryzen 3 2200G
|
||||
[on] - Ryzen 5 2400G
|
||||
``gfx904`` ``amdgcn`` dGPU - xnack *TBA*
|
||||
[off]
|
||||
.. TODO
|
||||
Add product
|
||||
names.
|
||||
``gfx906`` ``amdgcn`` dGPU - xnack *TBA*
|
||||
[off]
|
||||
.. TODO
|
||||
Add product
|
||||
names.
|
||||
=========== =============== ============ ===== ========= ======= ==================
|
||||
|
||||
.. _amdgpu-target-features:
|
||||
@ -547,8 +557,8 @@ The AMDGPU backend uses the following ELF header:
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
|
||||
*reserved* 0x02e Reserved.
|
||||
*reserved* 0x02f Reserved.
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
|
||||
``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
|
||||
*reserved* 0x030 Reserved.
|
||||
================================= ========== =============================
|
||||
|
||||
@ -765,7 +775,7 @@ The following relocation types are supported:
|
||||
``R_AMDGPU_ABS32_HI`` Static, 2 ``word32`` (S + A) >> 32
|
||||
Dynamic
|
||||
``R_AMDGPU_ABS64`` Static, 3 ``word64`` S + A
|
||||
Dynamic
|
||||
Dynamic
|
||||
``R_AMDGPU_REL32`` Static 4 ``word32`` S + A - P
|
||||
``R_AMDGPU_REL64`` Static 5 ``word64`` S + A - P
|
||||
``R_AMDGPU_ABS32`` Static, 6 ``word32`` S + A
|
||||
@ -784,7 +794,7 @@ the ``mesa3d`` OS, which does not support ``R_AMDGPU_ABS64``.
|
||||
|
||||
There is no current OS loader support for 32 bit programs and so
|
||||
``R_AMDGPU_ABS32`` is not used.
|
||||
|
||||
|
||||
.. _amdgpu-dwarf:
|
||||
|
||||
DWARF
|
||||
|
@ -687,7 +687,7 @@ enum : unsigned {
|
||||
|
||||
// AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_FIRST = 0x020,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = 0x02d,
|
||||
EF_AMDGPU_MACH_AMDGCN_LAST = 0x02f,
|
||||
// AMDGCN GFX6.
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021,
|
||||
@ -705,12 +705,12 @@ enum : unsigned {
|
||||
// AMDGCN GFX9.
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
|
||||
|
||||
// Reserved for AMDGCN-based processors.
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x02e,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x02f,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED3 = 0x030,
|
||||
EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
|
||||
|
||||
// Indicates if the xnack target feature is enabled for all code contained in
|
||||
// the object.
|
||||
|
@ -1287,6 +1287,109 @@ def int_amdgcn_ds_bpermute :
|
||||
GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Deep learning intrinsics.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// f32 %r = llvm.amdgcn.fdot2(v2f16 %a, v2f16 %b, f32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
|
||||
def int_amdgcn_fdot2 :
|
||||
GCCBuiltin<"__builtin_amdgcn_fdot2">,
|
||||
Intrinsic<
|
||||
[llvm_float_ty], // %r
|
||||
[
|
||||
llvm_v2f16_ty, // %a
|
||||
llvm_v2f16_ty, // %b
|
||||
llvm_float_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// i32 %r = llvm.amdgcn.sdot2(v2i16 %a, v2i16 %b, i32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
|
||||
def int_amdgcn_sdot2 :
|
||||
GCCBuiltin<"__builtin_amdgcn_sdot2">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_v2i16_ty, // %a
|
||||
llvm_v2i16_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// u32 %r = llvm.amdgcn.udot2(v2u16 %a, v2u16 %b, u32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %c
|
||||
def int_amdgcn_udot2 :
|
||||
GCCBuiltin<"__builtin_amdgcn_udot2">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_v2i16_ty, // %a
|
||||
llvm_v2i16_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// i32 %r = llvm.amdgcn.sdot4(v4i8 (as i32) %a, v4i8 (as i32) %b, i32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
|
||||
def int_amdgcn_sdot4 :
|
||||
GCCBuiltin<"__builtin_amdgcn_sdot4">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_i32_ty, // %a
|
||||
llvm_i32_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// u32 %r = llvm.amdgcn.udot4(v4u8 (as u32) %a, v4u8 (as u32) %b, u32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] + %c
|
||||
def int_amdgcn_udot4 :
|
||||
GCCBuiltin<"__builtin_amdgcn_udot4">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_i32_ty, // %a
|
||||
llvm_i32_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// i32 %r = llvm.amdgcn.sdot8(v8i4 (as i32) %a, v8i4 (as i32) %b, i32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
|
||||
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
|
||||
def int_amdgcn_sdot8 :
|
||||
GCCBuiltin<"__builtin_amdgcn_sdot8">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_i32_ty, // %a
|
||||
llvm_i32_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
// u32 %r = llvm.amdgcn.udot8(v8u4 (as u32) %a, v8u4 (as u32) %b, u32 %c)
|
||||
// %r = %a[0] * %b[0] + %a[1] * %b[1] + %a[2] * %b[2] + %a[3] * %b[3] +
|
||||
// %a[4] * %b[4] + %a[5] * %b[5] + %a[6] * %b[6] + %a[7] * %b[7] + %c
|
||||
def int_amdgcn_udot8 :
|
||||
GCCBuiltin<"__builtin_amdgcn_udot8">,
|
||||
Intrinsic<
|
||||
[llvm_i32_ty], // %r
|
||||
[
|
||||
llvm_i32_ty, // %a
|
||||
llvm_i32_ty, // %b
|
||||
llvm_i32_ty // %c
|
||||
],
|
||||
[IntrNoMem, IntrSpeculatable]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Special Intrinsics for backend internal use only. No frontend
|
||||
|
@ -400,6 +400,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX810, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX900, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX902, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
|
||||
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
|
||||
BCase(EF_AMDGPU_XNACK);
|
||||
break;
|
||||
case ELF::EM_X86_64:
|
||||
|
@ -127,6 +127,12 @@ def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts",
|
||||
"Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions"
|
||||
>;
|
||||
|
||||
def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
|
||||
"HasFmaMixInsts",
|
||||
"true",
|
||||
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
|
||||
>;
|
||||
|
||||
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
|
||||
// XNACK. The current default kernel driver setting is:
|
||||
// - graphics ring: XNACK disabled
|
||||
@ -310,6 +316,12 @@ def FeatureUnpackedD16VMem : SubtargetFeature<"unpacked-d16-vmem",
|
||||
"Has unpacked d16 vmem instructions"
|
||||
>;
|
||||
|
||||
def FeatureDLInsts : SubtargetFeature<"dl-insts",
|
||||
"HasDLInsts",
|
||||
"true",
|
||||
"Has deep learning instructions"
|
||||
>;
|
||||
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
@ -606,6 +618,18 @@ def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2,
|
||||
FeatureXNACK
|
||||
]>;
|
||||
|
||||
def FeatureISAVersion9_0_4 : SubtargetFeatureISAVersion <9,0,4,
|
||||
[FeatureGFX9,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureFmaMixInsts]>;
|
||||
|
||||
def FeatureISAVersion9_0_6 : SubtargetFeatureISAVersion <9,0,6,
|
||||
[FeatureGFX9,
|
||||
HalfRate64Ops,
|
||||
FeatureFmaMixInsts,
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Debugger related subtarget features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -788,6 +812,13 @@ def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
|
||||
def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
|
||||
AssemblerPredicate<"FeatureMovrel">;
|
||||
|
||||
def HasFmaMixInsts : Predicate<"Subtarget->hasFmaMixInsts()">,
|
||||
AssemblerPredicate<"FeatureFmaMixInsts">;
|
||||
|
||||
def HasDLInsts : Predicate<"Subtarget->hasDLInsts()">,
|
||||
AssemblerPredicate<"FeatureDLInsts">;
|
||||
|
||||
|
||||
def EnableLateCFGStructurize : Predicate<
|
||||
"EnableLateStructurizeCFG">;
|
||||
|
||||
|
@ -215,7 +215,7 @@ private:
|
||||
void SelectS_BFE(SDNode *N);
|
||||
bool isCBranchSCC(const SDNode *N) const;
|
||||
void SelectBRCOND(SDNode *N);
|
||||
void SelectFMAD(SDNode *N);
|
||||
void SelectFMAD_FMA(SDNode *N);
|
||||
void SelectATOMIC_CMP_SWAP(SDNode *N);
|
||||
|
||||
protected:
|
||||
@ -621,7 +621,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
SelectBRCOND(N);
|
||||
return;
|
||||
case ISD::FMAD:
|
||||
SelectFMAD(N);
|
||||
case ISD::FMA:
|
||||
SelectFMAD_FMA(N);
|
||||
return;
|
||||
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
||||
SelectATOMIC_CMP_SWAP(N);
|
||||
@ -1728,9 +1729,13 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
|
||||
VCC.getValue(0));
|
||||
}
|
||||
|
||||
void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
|
||||
void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
if (VT != MVT::f32 || !Subtarget->hasMadMixInsts()) {
|
||||
bool IsFMA = N->getOpcode() == ISD::FMA;
|
||||
if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
|
||||
!Subtarget->hasFmaMixInsts()) ||
|
||||
((IsFMA && Subtarget->hasMadMixInsts()) ||
|
||||
(!IsFMA && Subtarget->hasFmaMixInsts()))) {
|
||||
SelectCode(N);
|
||||
return;
|
||||
}
|
||||
@ -1740,13 +1745,13 @@ void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
|
||||
SDValue Src2 = N->getOperand(2);
|
||||
unsigned Src0Mods, Src1Mods, Src2Mods;
|
||||
|
||||
// Avoid using v_mad_mix_f32 unless there is actually an operand using the
|
||||
// conversion from f16.
|
||||
// Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
|
||||
// using the conversion from f16.
|
||||
bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
|
||||
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
|
||||
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
|
||||
|
||||
assert(!Subtarget->hasFP32Denormals() &&
|
||||
assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
|
||||
"fmad selected with denormals enabled");
|
||||
// TODO: We can select this with f32 denormals enabled if all the sources are
|
||||
// converted from f16 (in which case fmad isn't legal).
|
||||
@ -1762,7 +1767,9 @@ void AMDGPUDAGToDAGISel::SelectFMAD(SDNode *N) {
|
||||
Zero, Zero
|
||||
};
|
||||
|
||||
CurDAG->SelectNodeTo(N, AMDGPU::V_MAD_MIX_F32, MVT::f32, Ops);
|
||||
CurDAG->SelectNodeTo(N,
|
||||
IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
|
||||
MVT::f32, Ops);
|
||||
} else {
|
||||
SelectCode(N);
|
||||
}
|
||||
|
@ -939,7 +939,8 @@ bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
|
||||
// where this is OK to use.
|
||||
bool AMDGPUTargetLowering::isFPExtFoldable(unsigned Opcode,
|
||||
EVT DestVT, EVT SrcVT) const {
|
||||
return Opcode == ISD::FMAD && Subtarget->hasMadMixInsts() &&
|
||||
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
|
||||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
|
||||
DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() &&
|
||||
SrcVT.getScalarType() == MVT::f16;
|
||||
}
|
||||
|
@ -148,6 +148,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasIntClamp(false),
|
||||
HasVOP3PInsts(false),
|
||||
HasMadMixInsts(false),
|
||||
HasFmaMixInsts(false),
|
||||
HasMovrel(false),
|
||||
HasVGPRIndexMode(false),
|
||||
HasScalarStores(false),
|
||||
@ -160,6 +161,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasSDWAMac(false),
|
||||
HasSDWAOutModsVOPC(false),
|
||||
HasDPP(false),
|
||||
HasDLInsts(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
FlatGlobalInsts(false),
|
||||
|
@ -72,7 +72,10 @@ public:
|
||||
ISAVersion8_0_3,
|
||||
ISAVersion8_1_0,
|
||||
ISAVersion9_0_0,
|
||||
ISAVersion9_0_2
|
||||
ISAVersion9_0_1,
|
||||
ISAVersion9_0_2,
|
||||
ISAVersion9_0_4,
|
||||
ISAVersion9_0_6
|
||||
};
|
||||
|
||||
enum TrapHandlerAbi {
|
||||
@ -150,6 +153,7 @@ protected:
|
||||
bool HasIntClamp;
|
||||
bool HasVOP3PInsts;
|
||||
bool HasMadMixInsts;
|
||||
bool HasFmaMixInsts;
|
||||
bool HasMovrel;
|
||||
bool HasVGPRIndexMode;
|
||||
bool HasScalarStores;
|
||||
@ -162,6 +166,7 @@ protected:
|
||||
bool HasSDWAMac;
|
||||
bool HasSDWAOutModsVOPC;
|
||||
bool HasDPP;
|
||||
bool HasDLInsts;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
bool FlatGlobalInsts;
|
||||
@ -329,6 +334,10 @@ public:
|
||||
return HasMadMixInsts;
|
||||
}
|
||||
|
||||
bool hasFmaMixInsts() const {
|
||||
return HasFmaMixInsts;
|
||||
}
|
||||
|
||||
bool hasCARRY() const {
|
||||
return (getGeneration() >= EVERGREEN);
|
||||
}
|
||||
@ -534,6 +543,10 @@ public:
|
||||
return getGeneration() < SEA_ISLANDS;
|
||||
}
|
||||
|
||||
bool hasDLInsts() const {
|
||||
return HasDLInsts;
|
||||
}
|
||||
|
||||
/// \brief Returns the offset in bytes from the start of the input buffer
|
||||
/// of the first explicit kernel argument.
|
||||
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
|
||||
|
@ -4601,12 +4601,14 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
|
||||
}
|
||||
|
||||
// special case v_mac_{f16, f32}:
|
||||
// Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906):
|
||||
// it has src2 register operand that is tied to dst operand
|
||||
// we don't allow modifiers for this operand in assembler so src2_modifiers
|
||||
// should be 0
|
||||
if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi ||
|
||||
Opc == AMDGPU::V_MAC_F16_e64_vi) {
|
||||
// should be 0.
|
||||
if (Opc == AMDGPU::V_MAC_F32_e64_si ||
|
||||
Opc == AMDGPU::V_MAC_F32_e64_vi ||
|
||||
Opc == AMDGPU::V_MAC_F16_e64_vi ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64_vi) {
|
||||
auto it = Inst.begin();
|
||||
std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
|
||||
it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
|
||||
|
@ -201,7 +201,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
||||
|
||||
if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
|
||||
Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
|
||||
if (Res) break;
|
||||
if (Res)
|
||||
break;
|
||||
}
|
||||
|
||||
// Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
|
||||
// v_mad_mixhi_f16 for FMA variants. Try to decode using this special
|
||||
// table first so we print the correct name.
|
||||
if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
|
||||
Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
|
||||
if (Res)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,3 +148,11 @@ def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
|
||||
def : ProcessorModel<"gfx902", SIQuarterSpeedModel,
|
||||
[FeatureISAVersion9_0_2]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx904", SIQuarterSpeedModel,
|
||||
[FeatureISAVersion9_0_4]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"gfx906", SIQuarterSpeedModel,
|
||||
[FeatureISAVersion9_0_6]
|
||||
>;
|
||||
|
@ -95,6 +95,8 @@ unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
|
||||
// AMDGCN GFX9.
|
||||
.Case("gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900)
|
||||
.Case("gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902)
|
||||
.Case("gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904)
|
||||
.Case("gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906)
|
||||
// Not specified processor.
|
||||
.Default(ELF::EF_AMDGPU_MACH_NONE);
|
||||
}
|
||||
|
@ -127,14 +127,18 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII,
|
||||
unsigned Opc = UseMI.getOpcode();
|
||||
switch (Opc) {
|
||||
case AMDGPU::V_MAC_F32_e64:
|
||||
case AMDGPU::V_MAC_F16_e64: {
|
||||
case AMDGPU::V_MAC_F16_e64:
|
||||
case AMDGPU::V_FMAC_F32_e64: {
|
||||
// Special case for mac. Since this is replaced with mad when folded into
|
||||
// src2, we need to check the legality for the final instruction.
|
||||
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
|
||||
if (static_cast<int>(OpNo) == Src2Idx) {
|
||||
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
|
||||
bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
|
||||
const MCInstrDesc &MadDesc
|
||||
= TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
|
||||
|
||||
unsigned Opc = IsFMA ?
|
||||
AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
|
||||
const MCInstrDesc &MadDesc = TII->get(Opc);
|
||||
return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType);
|
||||
}
|
||||
return false;
|
||||
@ -224,13 +228,17 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
|
||||
|
||||
// Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
|
||||
unsigned Opc = MI->getOpcode();
|
||||
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) &&
|
||||
if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
|
||||
Opc == AMDGPU::V_FMAC_F32_e64) &&
|
||||
(int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
|
||||
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e64;
|
||||
bool IsF32 = Opc == AMDGPU::V_MAC_F32_e64;
|
||||
unsigned NewOpc = IsFMA ?
|
||||
AMDGPU::V_FMA_F32 : (IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16);
|
||||
|
||||
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
|
||||
// to fold the operand.
|
||||
MI->setDesc(TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16));
|
||||
MI->setDesc(TII->get(NewOpc));
|
||||
bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
|
||||
if (FoldAsMAD) {
|
||||
MI->untieRegOperand(OpNo);
|
||||
|
@ -3272,12 +3272,17 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
VT = VT.getScalarType();
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::f32:
|
||||
case MVT::f32: {
|
||||
// This is as fast on some subtargets. However, we always have full rate f32
|
||||
// mad available which returns the same result as the separate operations
|
||||
// which we should prefer over fma. We can't use this if we want to support
|
||||
// denormals, so only report this in these cases.
|
||||
return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
|
||||
if (Subtarget->hasFP32Denormals())
|
||||
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
|
||||
|
||||
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
|
||||
return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
|
||||
}
|
||||
case MVT::f64:
|
||||
return true;
|
||||
case MVT::f16:
|
||||
|
@ -2161,20 +2161,24 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
|
||||
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
|
||||
MachineInstr &MI,
|
||||
LiveVariables *LV) const {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
bool IsF16 = false;
|
||||
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
return nullptr;
|
||||
case AMDGPU::V_MAC_F16_e64:
|
||||
IsF16 = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AMDGPU::V_MAC_F32_e64:
|
||||
case AMDGPU::V_FMAC_F32_e64:
|
||||
break;
|
||||
case AMDGPU::V_MAC_F16_e32:
|
||||
IsF16 = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AMDGPU::V_MAC_F32_e32: {
|
||||
case AMDGPU::V_MAC_F32_e32:
|
||||
case AMDGPU::V_FMAC_F32_e32: {
|
||||
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
||||
AMDGPU::OpName::src0);
|
||||
const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
|
||||
@ -2199,7 +2203,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
|
||||
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
|
||||
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
|
||||
if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
|
||||
if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
|
||||
// If we have an SGPR input, we will violate the constant bus restriction.
|
||||
(!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
|
||||
if (auto Imm = getFoldableImm(Src2)) {
|
||||
@ -2230,8 +2234,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
|
||||
}
|
||||
}
|
||||
|
||||
return BuildMI(*MBB, MI, MI.getDebugLoc(),
|
||||
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
|
||||
assert((!IsFMA || !IsF16) && "fmac only expected with f32");
|
||||
unsigned NewOpc = IsFMA ? AMDGPU::V_FMA_F32 :
|
||||
(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32);
|
||||
return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
|
||||
.add(*Dst)
|
||||
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
|
||||
.add(*Src0)
|
||||
@ -4048,17 +4054,23 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
|
||||
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
|
||||
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
|
||||
|
||||
unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
|
||||
.add(Src0)
|
||||
.add(Src1);
|
||||
unsigned NewDest = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
if (ST.hasDLInsts()) {
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
|
||||
.add(Src0)
|
||||
.add(Src1);
|
||||
} else {
|
||||
unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
|
||||
.add(Src0)
|
||||
.add(Src1);
|
||||
|
||||
unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
|
||||
.addReg(Xor);
|
||||
BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), NewDest)
|
||||
.addReg(Xor);
|
||||
}
|
||||
|
||||
MRI.replaceRegWith(Dest.getReg(), Not);
|
||||
addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
|
||||
MRI.replaceRegWith(Dest.getReg(), NewDest);
|
||||
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
|
||||
}
|
||||
|
||||
void SIInstrInfo::splitScalar64BitUnaryOp(
|
||||
|
@ -1934,6 +1934,9 @@ def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
|
||||
def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
|
||||
def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
|
||||
|
||||
def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
|
||||
def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
|
||||
|
||||
class Commutable_REV <string revOp, bit isOrig> {
|
||||
string RevOp = revOp;
|
||||
bit IsOrig = isOrig;
|
||||
|
@ -1367,6 +1367,16 @@ def : GCNPat<
|
||||
>;
|
||||
}
|
||||
|
||||
let OtherPredicates = [HasDLInsts] in {
|
||||
def : GCNPat <
|
||||
(fma (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
|
||||
(f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3NoMods f32:$src2))),
|
||||
(V_FMAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
|
||||
SRCMODS.NONE, $src2, $clamp, $omod)
|
||||
>;
|
||||
} // End OtherPredicates = [HasDLInsts]
|
||||
|
||||
|
||||
// Allow integer inputs
|
||||
class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
|
||||
|
@ -100,6 +100,7 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
|
||||
|
||||
case AMDGPU::V_MAC_F32_e64:
|
||||
case AMDGPU::V_MAC_F16_e64:
|
||||
case AMDGPU::V_FMAC_F32_e64:
|
||||
if (!isVGPR(Src2, TRI, MRI) ||
|
||||
TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
|
||||
return false;
|
||||
|
@ -221,6 +221,10 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
|
||||
return {9, 0, 0};
|
||||
if (Features.test(FeatureISAVersion9_0_2))
|
||||
return {9, 0, 2};
|
||||
if (Features.test(FeatureISAVersion9_0_4))
|
||||
return {9, 0, 4};
|
||||
if (Features.test(FeatureISAVersion9_0_6))
|
||||
return {9, 0, 6};
|
||||
if (Features.test(FeatureGFX9))
|
||||
return {9, 0, 0};
|
||||
|
||||
|
@ -491,6 +491,19 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
|
||||
|
||||
} // End SubtargetPredicate = Has16BitInsts
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
|
||||
defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>;
|
||||
|
||||
let Constraints = "$vdst = $src2",
|
||||
DisableEncoding="$src2",
|
||||
isConvertibleToThreeAddress = 1,
|
||||
isCommutable = 1 in {
|
||||
defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>;
|
||||
}
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
||||
// Note: 16-bit instructions produce a 0 result in the high 16-bits.
|
||||
multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {
|
||||
|
||||
@ -944,3 +957,10 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
|
||||
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
|
||||
defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
|
||||
defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
@ -68,6 +68,67 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I1
|
||||
def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
|
||||
def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
|
||||
|
||||
multiclass MadFmaMixPats<SDPatternOperator fma_like,
|
||||
Instruction mix_inst,
|
||||
Instruction mixlo_inst,
|
||||
Instruction mixhi_inst> {
|
||||
def : GCNPat <
|
||||
(f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
||||
(mixlo_inst $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.NONE,
|
||||
(i32 (IMPLICIT_DEF)))
|
||||
>;
|
||||
|
||||
// FIXME: Special case handling for maxhi (especially for clamp)
|
||||
// because dealing with the write to high half of the register is
|
||||
// difficult.
|
||||
def : GCNPat <
|
||||
(build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
||||
(v2f16 (mixhi_inst $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.NONE,
|
||||
$elt0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(build_vector
|
||||
f16:$elt0,
|
||||
(AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
|
||||
(v2f16 (mixhi_inst $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
$elt0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(AMDGPUclamp (build_vector
|
||||
(fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
|
||||
(fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
|
||||
(v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0,
|
||||
$hi_src1_modifiers, $hi_src1,
|
||||
$hi_src2_modifiers, $hi_src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
(mixlo_inst $lo_src0_modifiers, $lo_src0,
|
||||
$lo_src1_modifiers, $lo_src1,
|
||||
$lo_src2_modifiers, $lo_src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
(i32 (IMPLICIT_DEF)))))
|
||||
>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasMadMixInsts in {
|
||||
// These are VOP3a-like opcodes which accept no omod.
|
||||
@ -84,64 +145,37 @@ def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16
|
||||
}
|
||||
}
|
||||
|
||||
def : GCNPat <
|
||||
(f16 (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
||||
(V_MAD_MIXLO_F16 $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.NONE,
|
||||
(i32 (IMPLICIT_DEF)))
|
||||
>;
|
||||
defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
|
||||
} // End SubtargetPredicate = HasMadMixInsts
|
||||
|
||||
// FIXME: Special case handling for maxhi (especially for clamp)
|
||||
// because dealing with the write to high half of the register is
|
||||
// difficult.
|
||||
def : GCNPat <
|
||||
(build_vector f16:$elt0, (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
||||
(v2f16 (V_MAD_MIXHI_F16 $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.NONE,
|
||||
$elt0))
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(build_vector
|
||||
f16:$elt0,
|
||||
(AMDGPUclamp (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
|
||||
(v2f16 (V_MAD_MIXHI_F16 $src0_modifiers, $src0,
|
||||
$src1_modifiers, $src1,
|
||||
$src2_modifiers, $src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
$elt0))
|
||||
>;
|
||||
// Essentially the same as the mad_mix versions
|
||||
let SubtargetPredicate = HasFmaMixInsts in {
|
||||
let isCommutable = 1 in {
|
||||
def V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
|
||||
|
||||
def : GCNPat <
|
||||
(AMDGPUclamp (build_vector
|
||||
(fpround (fmad (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
|
||||
(fpround (fmad (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
|
||||
(f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
|
||||
(v2f16 (V_MAD_MIXHI_F16 $hi_src0_modifiers, $hi_src0,
|
||||
$hi_src1_modifiers, $hi_src1,
|
||||
$hi_src2_modifiers, $hi_src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
(V_MAD_MIXLO_F16 $lo_src0_modifiers, $lo_src0,
|
||||
$lo_src1_modifiers, $lo_src1,
|
||||
$lo_src2_modifiers, $lo_src2,
|
||||
DSTCLAMP.ENABLE,
|
||||
(i32 (IMPLICIT_DEF)))))
|
||||
>;
|
||||
// Clamp modifier is applied after conversion to f16.
|
||||
def V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
|
||||
|
||||
} // End SubtargetPredicate = [HasMadMixInsts]
|
||||
let ClampLo = 0, ClampHi = 1 in {
|
||||
def V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, 1>;
|
||||
}
|
||||
}
|
||||
|
||||
defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
|
||||
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", VOP3_Profile<VOP_F32_V2F16_V2F16_F32>, int_amdgcn_fdot2>;
|
||||
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2>;
|
||||
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16", VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2>;
|
||||
def V_DOT4_I32_I8 : VOP3Inst<"v_dot4_i32_i8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sdot4>;
|
||||
def V_DOT4_U32_U8 : VOP3Inst<"v_dot4_u32_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_udot4>;
|
||||
def V_DOT8_I32_I4 : VOP3Inst<"v_dot8_i32_i4", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sdot8>;
|
||||
def V_DOT8_U32_U4 : VOP3Inst<"v_dot8_u32_u4", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_udot8>;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
||||
multiclass VOP3P_Real_vi<bits<10> op> {
|
||||
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
||||
@ -172,6 +206,33 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
|
||||
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
|
||||
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
|
||||
|
||||
|
||||
let SubtargetPredicate = HasMadMixInsts in {
|
||||
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
|
||||
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
|
||||
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasFmaMixInsts in {
|
||||
let DecoderNamespace = "GFX9_DL" in {
|
||||
// The mad_mix instructions were renamed and their behaviors changed,
|
||||
// but the opcode stayed the same so we need to put these in a
|
||||
// different DecoderNamespace to avoid the ambiguity.
|
||||
defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>;
|
||||
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
|
||||
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let SubtargetPredicate = HasDLInsts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>;
|
||||
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>;
|
||||
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>;
|
||||
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>;
|
||||
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>;
|
||||
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>;
|
||||
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>;
|
||||
|
||||
} // End SubtargetPredicate = HasDLInsts
|
||||
|
@ -44,6 +44,8 @@
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=stoney < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX810 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx900 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX900 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx902 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX902 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s
|
||||
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
|
||||
|
||||
; ARCH-R600: Arch: r600
|
||||
; ARCH-GCN: Arch: amdgcn
|
||||
@ -81,6 +83,8 @@
|
||||
; GFX900: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
|
||||
; GFX902: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
|
||||
; GFX902-NEXT: EF_AMDGPU_XNACK (0x100)
|
||||
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
|
||||
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
|
||||
; ALL: ]
|
||||
|
||||
define amdgpu_kernel void @elf_header() {
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 -check-prefix=FUNC %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cedar -verify-machineinstrs < %s
|
||||
; RUN: not llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=juniper -verify-machineinstrs < %s
|
||||
@ -16,6 +17,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_f32:
|
||||
; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GFX906: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
|
||||
; EG: FMA {{\*? *}}[[RES]]
|
||||
@ -29,10 +31,20 @@ define amdgpu_kernel void @fma_f32(float addrspace(1)* %out, float addrspace(1)*
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmac_to_3addr_f32:
|
||||
; GCN: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
define float @fmac_to_3addr_f32(float %r0, float %r1, float %r2) {
|
||||
%r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
|
||||
ret float %r3
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}fma_v2f32:
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
|
||||
; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GFX906: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
|
||||
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
|
||||
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
|
||||
@ -51,6 +63,10 @@ define amdgpu_kernel void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float>
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
; SI: v_fma_f32
|
||||
; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GFX906: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+$}}
|
||||
; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GFX906: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
|
||||
; EG-DAG: FMA {{\*? *}}[[RES]].X
|
||||
@ -97,3 +113,34 @@ define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %ou
|
||||
store float %fma, float addrspace(1)* %out.gep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Without special casing the inline constant check for v_fmac_f32's
|
||||
; src2, this fails to fold the 1.0 into an fma.
|
||||
|
||||
; FUNC-LABEL: {{^}}fold_inline_imm_into_fmac_src2_f32:
|
||||
; GFX906: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
|
||||
; GFX906: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]]
|
||||
|
||||
; GFX906: v_add_f32_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
|
||||
; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
|
||||
define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind {
|
||||
bb:
|
||||
%tid = call i32 @llvm.r600.read.tidig.x()
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
%gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext
|
||||
%gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext
|
||||
%gep.out = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
|
||||
%tmp = load volatile float, float addrspace(1)* %gep.a
|
||||
%tmp1 = load volatile float, float addrspace(1)* %gep.b
|
||||
%tmp2 = fadd contract float %tmp, %tmp
|
||||
%tmp3 = fmul contract float %tmp2, 4.0
|
||||
%tmp4 = fsub contract float 1.0, %tmp3
|
||||
%tmp5 = fadd contract float %tmp4, %tmp1
|
||||
%tmp6 = fadd contract float %tmp1, %tmp1
|
||||
%tmp7 = fmul contract float %tmp6, %tmp
|
||||
%tmp8 = fsub contract float 1.0, %tmp7
|
||||
%tmp9 = fmul contract float %tmp8, 8.0
|
||||
%tmp10 = fadd contract float %tmp5, %tmp9
|
||||
store float %tmp10, float addrspace(1)* %gep.out
|
||||
ret void
|
||||
}
|
||||
|
@ -1,12 +1,22 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-STRICT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-STRICT,SI %s
|
||||
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-CONTRACT,GCN-FLUSH-MAD,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
|
||||
|
||||
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-MAD,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX900 %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx900 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX900 %s
|
||||
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GFX9-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX906 %s
|
||||
|
||||
; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
|
||||
; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
|
||||
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=-fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=tahiti -mattr=+fp32-denormals,+fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=-fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-FLUSH-CONTRACT,GCN-FLUSH,SI-FLUSH,GCN-FLUSH-SLOWFMA,GCN-FLUSH-SLOWFMA-CONTRACT,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=verde -mattr=+fp32-denormals,-fast-fmaf -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GCN-DENORM-CONTRACT,GCN-DENORM,SI-DENORM,GCN-DENORM-SLOWFMA,GCN-DENORM-SLOWFMA-CONTRACT,SI %s
|
||||
|
||||
; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
|
||||
|
||||
@ -19,7 +29,8 @@ declare half @llvm.fmuladd.f16(half, half, half) #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_f32:
|
||||
; GCN-FLUSH: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
|
||||
|
||||
@ -57,10 +68,11 @@ define amdgpu_kernel void @fmul_fadd_f32(float addrspace(1)* %out, float addrspa
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_2.0_a_b_f32
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
@ -86,12 +98,14 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float a
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_a_2.0_b_f32
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
||||
|
||||
@ -99,7 +113,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float a
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -115,12 +129,13 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float a
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fadd_a_a_b_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
||||
|
||||
@ -131,7 +146,7 @@ define amdgpu_kernel void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float a
|
||||
; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #0 {
|
||||
@ -150,12 +165,13 @@ define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fadd_b_a_a_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
||||
|
||||
@ -166,7 +182,7 @@ define amdgpu_kernel void @fadd_a_a_b_f32(float addrspace(1)* %out,
|
||||
; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
float addrspace(1)* %in1,
|
||||
float addrspace(1)* %in2) #0 {
|
||||
@ -185,9 +201,10 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
|
||||
@ -195,7 +212,7 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -210,13 +227,16 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, flo
|
||||
ret void
|
||||
}
|
||||
|
||||
; XXX
|
||||
; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]]
|
||||
|
||||
@ -224,7 +244,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, flo
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -242,12 +262,14 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
|
||||
; GCN-FLUSH-MAD: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; GCN-FLUSH-FMAC: v_fmac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
|
||||
@ -255,7 +277,7 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
|
||||
; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -273,11 +295,13 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH-MAD: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
; GCN-FLUSH-FMAC: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
|
||||
; SI-FLUSH: buffer_store_dword [[RESULT]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
|
||||
@ -285,7 +309,7 @@ define amdgpu_kernel void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, flo
|
||||
; GCN-DENORM-SLOWFMA: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -303,9 +327,9 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mad_sub_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
|
||||
@ -317,7 +341,7 @@ define amdgpu_kernel void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, flo
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -337,9 +361,9 @@ define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %ou
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mad_sub_inv_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
|
||||
|
||||
@ -352,7 +376,7 @@ define amdgpu_kernel void @mad_sub_f32(float addrspace(1)* noalias nocapture %ou
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -372,9 +396,9 @@ define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mad_sub_fabs_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
|
||||
@ -386,7 +410,7 @@ define amdgpu_kernel void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], [[TMP]], |[[REGC]]|
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -407,10 +431,11 @@ define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocaptur
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mad_sub_fabs_inv_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN-FLUSH-MAD: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
||||
; GCN-FLUSH-FMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
|
||||
|
||||
@ -421,7 +446,7 @@ define amdgpu_kernel void @mad_sub_fabs_f32(float addrspace(1)* noalias nocaptur
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e64 [[RESULT:v[0-9]+]], |[[REGC]]|, [[TMP]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -442,13 +467,13 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias noca
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}neg_neg_mad_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[REGC]], [[REGA]], [[REGB]]
|
||||
; SI-FLUSH: buffer_store_dword [[REGC]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
|
||||
|
||||
@ -459,7 +484,7 @@ define amdgpu_kernel void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias noca
|
||||
; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -481,9 +506,9 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}mad_fabs_sub_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGA:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGB:v[0-9]+]]
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[REGC:v[0-9]+]]
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
|
||||
@ -495,7 +520,7 @@ define amdgpu_kernel void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #0 {
|
||||
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tid.ext = sext i32 %tid to i64
|
||||
@ -516,11 +541,11 @@ define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocaptur
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fsub_c_fadd_a_a_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
|
||||
; SI-FLUSH: buffer_store_dword [[R2]]
|
||||
; VI-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
; VI-FLUSH: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
|
||||
@ -531,7 +556,7 @@ define amdgpu_kernel void @mad_fabs_sub_f32(float addrspace(1)* noalias nocaptur
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
@ -549,8 +574,8 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float a
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}fsub_fadd_a_a_c_f32:
|
||||
; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
|
||||
; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]],
|
||||
; GCN-FLUSH: v_mad_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
|
||||
; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], 2.0, -[[R2]]
|
||||
@ -562,7 +587,7 @@ define amdgpu_kernel void @fsub_c_fadd_a_a_f32(float addrspace(1)* %out, float a
|
||||
; GCN-DENORM-STRICT: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI: buffer_store_dword [[RESULT]]
|
||||
; VI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
; VI: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
define amdgpu_kernel void @fsub_fadd_a_a_c_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
|
||||
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
|
@ -21,6 +21,8 @@
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx902 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX902 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
|
||||
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
|
||||
|
||||
; HSA: .hsa_code_object_version 2,1
|
||||
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
|
||||
@ -36,3 +38,5 @@
|
||||
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
|
||||
; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
|
||||
; HSA-GFX902: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
|
||||
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
|
||||
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
|
||||
|
19
test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
Normal file
19
test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX906
|
||||
|
||||
declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c)
|
||||
|
||||
; GFX906-LABEL: {{^}}test_llvm_amdgcn_fdot2
|
||||
; GFX906: v_dot2_f32_f16
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_fdot2(
|
||||
float addrspace(1)* %r,
|
||||
<2 x half> addrspace(1)* %a,
|
||||
<2 x half> addrspace(1)* %b,
|
||||
float addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
|
||||
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
|
||||
%c.val = load float, float addrspace(1)* %c
|
||||
%r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val)
|
||||
store float %r.val, float addrspace(1)* %r
|
||||
ret void
|
||||
}
|
19
test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
Normal file
19
test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2
|
||||
; GFX906: v_dot2_i32_i16
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_sdot2(
|
||||
i32 addrspace(1)* %r,
|
||||
<2 x i16> addrspace(1)* %a,
|
||||
<2 x i16> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
|
||||
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
21
test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
Normal file
21
test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4
|
||||
; GFX906: v_dot4_i32_i8
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_sdot4(
|
||||
i32 addrspace(1)* %r,
|
||||
<4 x i8> addrspace(1)* %a,
|
||||
<4 x i8> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a
|
||||
%b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b
|
||||
%a.val.cast = bitcast <4 x i8> %a.val to i32
|
||||
%b.val.cast = bitcast <4 x i8> %b.val to i32
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
21
test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
Normal file
21
test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8
|
||||
; GFX906: v_dot8_i32_i4
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_sdot8(
|
||||
i32 addrspace(1)* %r,
|
||||
<8 x i4> addrspace(1)* %a,
|
||||
<8 x i4> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
|
||||
%b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
|
||||
%a.val.cast = bitcast <8 x i4> %a.val to i32
|
||||
%b.val.cast = bitcast <8 x i4> %b.val to i32
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
19
test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
Normal file
19
test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2
|
||||
; GFX906: v_dot2_u32_u16
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_udot2(
|
||||
i32 addrspace(1)* %r,
|
||||
<2 x i16> addrspace(1)* %a,
|
||||
<2 x i16> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
|
||||
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
21
test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
Normal file
21
test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4
|
||||
; GFX906: v_dot4_u32_u8
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_udot4(
|
||||
i32 addrspace(1)* %r,
|
||||
<4 x i8> addrspace(1)* %a,
|
||||
<4 x i8> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a
|
||||
%b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b
|
||||
%a.val.cast = bitcast <4 x i8> %a.val to i32
|
||||
%b.val.cast = bitcast <4 x i8> %b.val to i32
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
21
test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
Normal file
21
test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
Normal file
@ -0,0 +1,21 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX906
|
||||
|
||||
declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c)
|
||||
|
||||
; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8
|
||||
; GFX906: v_dot8_u32_u4
|
||||
define amdgpu_kernel void @test_llvm_amdgcn_udot8(
|
||||
i32 addrspace(1)* %r,
|
||||
<8 x i4> addrspace(1)* %a,
|
||||
<8 x i4> addrspace(1)* %b,
|
||||
i32 addrspace(1)* %c) {
|
||||
entry:
|
||||
%a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a
|
||||
%b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b
|
||||
%a.val.cast = bitcast <8 x i4> %a.val to i32
|
||||
%b.val.cast = bitcast <8 x i4> %b.val to i32
|
||||
%c.val = load i32, i32 addrspace(1)* %c
|
||||
%r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val)
|
||||
store i32 %r.val, i32 addrspace(1)* %r
|
||||
ret void
|
||||
}
|
@ -1,9 +1,11 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX906,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,CI %s
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo:
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x00,0x03,0x0a,0x1c]
|
||||
; VI: v_mac_f32
|
||||
; CI: v_mad_f32
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
||||
@ -15,7 +17,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_int:
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; CIVI: v_mac_f32
|
||||
define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
|
||||
%src0.hi = lshr i32 %src0, 16
|
||||
@ -35,7 +38,8 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] ; encoding
|
||||
; VI: v_mac_f32
|
||||
; CI: v_mad_f32
|
||||
define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
||||
@ -50,9 +54,13 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_v2f32:
|
||||
; GFX9: v_mov_b32_e32 v3, v1
|
||||
; GFX9-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1]
|
||||
; GFX900: v_mov_b32_e32 v3, v1
|
||||
; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1]
|
||||
|
||||
; GFX906: v_mov_b32_e32 v3, v1
|
||||
; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel_hi:[1,1,1]
|
||||
|
||||
; CIVI: v_mac_f32
|
||||
define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
||||
@ -65,10 +73,15 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_v2f32_shuffle:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX9-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX900-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX900-NEXT: v_mad_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: s_setpc_b64
|
||||
|
||||
; GFX906-NEXT: v_mov_b32_e32 v3, v1
|
||||
; GFX906-NEXT: v_fma_mix_f32 v1, v0, v3, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v3, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mac_f32
|
||||
define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
||||
@ -83,9 +96,13 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_negf16lo_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX900: s_waitcnt
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX900-NEXT: s_setpc_b64
|
||||
|
||||
; GFX906: s_waitcnt
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX906-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
||||
@ -98,7 +115,8 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_absf16lo_f16lo_f16lo:
|
||||
; GFX9: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
; GFX900: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
; GFX906: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
||||
@ -111,9 +129,13 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
; GFX900: s_waitcnt
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: s_setpc_b64
|
||||
|
||||
; GFX906: s_waitcnt
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
|
||||
@ -128,7 +150,8 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
@ -141,7 +164,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2)
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negf32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
@ -155,7 +179,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_absf32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
@ -169,7 +194,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_negabsf32:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
@ -189,7 +215,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imm1:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9: v_mov_b32_e32 v2, 1.0
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; CIVI: v_mad_f32 v0, v0, v1, 1.0
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
@ -203,7 +230,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9: v_mov_b32_e32 v2, 0.15915494
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; VI: v_mad_f32 v0, v0, v1, 0.15915494
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
@ -219,7 +247,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0
|
||||
; f32 1/2pi = 0x3e22f983
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
|
||||
; GFX9: v_mov_b32_e32 v2, 0x3e230000
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; CIVI: v_madak_f32 v0, v0, v1, 0x3e230000
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
|
||||
@ -232,7 +261,8 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
|
||||
; GFX9: v_mov_b32_e32 v2, 0x367c0000
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; CIVI: v_madak_f32 v0, v0, v1, 0x367c0000
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
|
||||
@ -246,8 +276,11 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
|
||||
; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imm1:
|
||||
; GFX9: v_mov_b32_e32 v2, v1
|
||||
; GFX9: v_mov_b32_e32 v3, 1.0
|
||||
; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
|
||||
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
||||
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
||||
@ -258,8 +291,11 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
|
||||
; GCN-LABEL: {{^}}v_mad_mix_v2f32_cvtf16imminv2pi:
|
||||
; GFX9: v_mov_b32_e32 v2, v1
|
||||
; GFX9: v_mov_b32_e32 v3, 0x3e230000
|
||||
; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
|
||||
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
||||
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
||||
@ -271,8 +307,12 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
|
||||
; GCN-LABEL: {{^}}v_mad_mix_v2f32_f32imminv2pi:
|
||||
; GFX9: v_mov_b32_e32 v2, v1
|
||||
; GFX9: v_mov_b32_e32 v3, 0.15915494
|
||||
; GFX9: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; GFX900: v_mad_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
|
||||
; GFX906: v_fma_mix_f32 v1, v0, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v2, v3 op_sel_hi:[1,1,0] ; encoding
|
||||
define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
|
||||
%src0.ext = fpext <2 x half> %src0 to <2 x float>
|
||||
%src1.ext = fpext <2 x half> %src1 to <2 x float>
|
||||
@ -282,7 +322,8 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
|
||||
; GFX9: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
|
||||
; GFX900: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp ; encoding
|
||||
; VI: v_mac_f32_e64 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
|
||||
; CI: v_mad_f32 v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}}, v{{[0-9]}} clamp{{$}}
|
||||
define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
|
||||
@ -300,7 +341,7 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
|
||||
|
||||
; GCN-LABEL: no_mix_simple:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_mad_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: v_{{mad|fma}}_f32 v0, v0, v1, v2
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
|
||||
%result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
|
||||
@ -309,7 +350,9 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
|
||||
|
||||
; GCN-LABEL: no_mix_simple_fabs:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
||||
; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
||||
; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2
|
||||
; GFX906-NEXT: v_fma_f32 v0, v1, |v0|, v2
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
|
||||
%src0.fabs = call float @llvm.fabs.f32(float %src0)
|
||||
@ -322,10 +365,10 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
|
||||
; v_mad_mix_f32 flushes.
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_fma_f32
|
||||
; GFX900: v_cvt_f32_f16
|
||||
; GFX900: v_cvt_f32_f16
|
||||
; GFX900: v_cvt_f32_f16
|
||||
; GFX900: v_fma_f32
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
%src1.ext = fpext half %src1 to float
|
||||
@ -335,9 +378,12 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_denormals:
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_cvt_f32_f16
|
||||
; GFX9: v_fma_f32
|
||||
; GFX900: v_cvt_f32_f16
|
||||
; GFX900: v_cvt_f32_f16
|
||||
; GFX900: v_fma_f32
|
||||
|
||||
; GFX906-NOT: v_cvt_f32_f16
|
||||
; GFX906: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
%src1.ext = fpext half %src1 to float
|
||||
@ -375,32 +421,35 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
%src1.ext = fpext half %src1 to float
|
||||
%src2.ext = fpext half %src2 to float
|
||||
%mul = fmul float %src0.ext, %src1.ext
|
||||
%result = fadd float %mul, %src2.ext
|
||||
%mul = fmul contract float %src0.ext, %src1.ext
|
||||
%result = fadd contract float %mul, %src2.ext
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
|
||||
; GCN: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
|
||||
%src0.ext = fpext half %src0 to float
|
||||
%src1.ext = fpext half %src1 to float
|
||||
%mul = fmul float %src0.ext, %src1.ext
|
||||
%result = fadd float %mul, %src2
|
||||
%mul = fmul contract float %src0.ext, %src1.ext
|
||||
%result = fadd contract float %mul, %src2
|
||||
ret float %result
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; encoding
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
|
||||
; CIVI: v_mad_f32
|
||||
@ -418,7 +467,7 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
|
||||
|
||||
; Make sure we don't fold pre-cvt fneg if we already have a fabs
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX900: s_waitcnt
|
||||
define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
||||
%src0 = extractelement <2 x half> %src0.arg.bc, i32 1
|
||||
@ -433,7 +482,8 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
||||
@ -448,7 +498,8 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
||||
@ -463,7 +514,8 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
||||
@ -478,7 +530,8 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
|
||||
|
||||
; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
|
||||
; GFX9: s_waitcnt
|
||||
; GFX9-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
|
||||
; GFX9-NEXT: s_setpc_b64
|
||||
define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
|
||||
%src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX800 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-DL --check-prefix=GFX906 %s
|
||||
|
||||
; GCN-LABEL: {{^}}scalar_xnor_i32_one_use
|
||||
; GCN: s_xnor_b32
|
||||
@ -62,6 +63,7 @@ entry:
|
||||
; GCN-NOT: s_xnor_b32
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_not_b32
|
||||
; GCN-DL: v_xnor_b32
|
||||
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
|
||||
entry:
|
||||
%xor = xor i32 %a, %b
|
||||
@ -75,6 +77,8 @@ entry:
|
||||
; GCN: v_xor_b32
|
||||
; GCN: v_not_b32
|
||||
; GCN: v_not_b32
|
||||
; GCN-DL: v_xnor_b32
|
||||
; GCN-DL: v_xnor_b32
|
||||
define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
|
||||
entry:
|
||||
%xor = xor i64 %a, %b
|
||||
|
387
test/MC/AMDGPU/dl-insts-err.s
Normal file
387
test/MC/AMDGPU/dl-insts-err.s
Normal file
@ -0,0 +1,387 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx800 -show-encoding %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s 2>&1 | FileCheck %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=GFX906
|
||||
|
||||
//
|
||||
// Test unsupported GPUs.
|
||||
//
|
||||
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_fmac_f32 v0, v1, v2
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_xnor_b32 v0, v1, v2
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot2_f32_f16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot2_i32_i16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot2_u32_u16 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot4_i32_i8 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot4_u32_u8 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot8_i32_i4 v0, v1, v2, v3
|
||||
// CHECK: error: instruction not supported on this GPU
|
||||
v_dot8_u32_u4 v0, v1, v2, v3
|
||||
|
||||
//
|
||||
// Test invalid operands.
|
||||
//
|
||||
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0]
|
||||
// FIXME-GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 neg_lo:[0,0]
|
||||
// FIXME-GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 neg_hi:[0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0,0,0,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[2,2]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,-1]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,0]
|
||||
// GFX906: error: failed parsing operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[-1,-1]
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0,0,0,0]
|
||||
// FIXME-GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 neg_lo:[0,0]
|
||||
// FIXME-GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 neg_hi:[0,0]
|
||||
|
||||
//
|
||||
// Test regular modifiers.
|
||||
//
|
||||
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, |v1|, v2, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, v1, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, |v1|, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, |v1|, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_f32_f16 v0, |v1|, |v2|, |v3|
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, abs(v1), v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, v1, abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, v1, v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, abs(v1), abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, abs(v1), v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, abs(v1), abs(v2), abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, -v1, v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, -v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, -v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_f32_f16 v0, -v1, -v2, -v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, |v1|, v2, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, v1, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, v1, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, |v1|, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, |v1|, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_i32_i16 v0, |v1|, |v2|, |v3|
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, abs(v1), v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, abs(v1), abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, abs(v1), v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, abs(v1), abs(v2), abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, -v1, v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, -v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, -v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_i32_i16 v0, -v1, -v2, -v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, |v1|, v2, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, v1, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, v1, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, |v1|, |v2|, v3
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, |v1|, v2, |v3|
|
||||
// GFX906: error: not a valid operand
|
||||
v_dot2_u32_u16 v0, |v1|, |v2|, |v3|
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, abs(v1), v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, abs(v1), abs(v2), v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, abs(v1), v2, abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, abs(v1), abs(v2), abs(v3)
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, -v1, v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, -v1, -v2, v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, -v1, v2, -v3
|
||||
// GFX906: error: invalid operand for instruction
|
||||
v_dot2_u32_u16 v0, -v1, -v2, -v3
|
||||
|
||||
//
|
||||
// Test constant bus restrictions.
|
||||
//
|
||||
|
||||
// GFX906: error: invalid operand (violates constant bus restrictions)
|
||||
v_dot2_f32_f16 v255, s1, s2, s3
|
||||
// GFX906: error: invalid operand (violates constant bus restrictions)
|
||||
v_dot2_i32_i16 v255, s1, s2, s3
|
||||
// GFX906: error: invalid operand (violates constant bus restrictions)
|
||||
v_dot2_u32_u16 v255, s1, s2, s3
|
679
test/MC/AMDGPU/dl-insts.s
Normal file
679
test/MC/AMDGPU/dl-insts.s
Normal file
@ -0,0 +1,679 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck %s
|
||||
|
||||
//
|
||||
// VOP2 Instructions.
|
||||
//
|
||||
|
||||
// CHECK: encoding: [0x01,0x05,0x0a,0x76]
|
||||
v_fmac_f32 v5, v1, v2
|
||||
// CHECK: encoding: [0x01,0x05,0xfe,0x77]
|
||||
v_fmac_f32 v255, v1, v2
|
||||
// CHECK: encoding: [0xff,0x05,0x0a,0x76]
|
||||
v_fmac_f32 v5, v255, v2
|
||||
// CHECK: encoding: [0x01,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, s1, v2
|
||||
// CHECK: encoding: [0x65,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, s101, v2
|
||||
// CHECK: encoding: [0x66,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, flat_scratch_lo, v2
|
||||
// CHECK: encoding: [0x67,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, flat_scratch_hi, v2
|
||||
// CHECK: encoding: [0x6a,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, vcc_lo, v2
|
||||
// CHECK: encoding: [0x6b,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, vcc_hi, v2
|
||||
// CHECK: encoding: [0x7c,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, m0, v2
|
||||
// CHECK: encoding: [0x7e,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, exec_lo, v2
|
||||
// CHECK: encoding: [0x7f,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, exec_hi, v2
|
||||
// CHECK: encoding: [0x80,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, 0, v2
|
||||
// CHECK: encoding: [0xc1,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, -1, v2
|
||||
// CHECK: encoding: [0xf0,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, 0.5, v2
|
||||
// CHECK: encoding: [0xf7,0x04,0x0a,0x76]
|
||||
v_fmac_f32 v5, -4.0, v2
|
||||
// CHECK: encoding: [0xff,0x04,0x0a,0x76,0x56,0x34,0x12,0xaf]
|
||||
v_fmac_f32 v5, 0xaf123456, v2
|
||||
// CHECK: encoding: [0xff,0x04,0x0a,0x76,0x73,0x72,0x71,0x3f]
|
||||
v_fmac_f32 v5, 0x3f717273, v2
|
||||
// CHECK: encoding: [0x01,0xff,0x0b,0x76]
|
||||
v_fmac_f32 v5, v1, v255
|
||||
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, v1, v2
|
||||
// CHECK: encoding: [0xff,0x00,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v255, v1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0xff,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, v255, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, s1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x65,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, s101, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x66,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, flat_scratch_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x67,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, flat_scratch_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x6a,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, vcc_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x6b,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, vcc_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x7c,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, m0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x7e,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, exec_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x7f,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, exec_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x80,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, 0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0xc1,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, -1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0xf0,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, 0.5, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0xf7,0x04,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, -4.0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xff,0x03,0x00]
|
||||
v_fmac_f32_e64 v5, v1, v255
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, s2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xcb,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, s101
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xcd,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, flat_scratch_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xcf,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, flat_scratch_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xd5,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, vcc_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xd7,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, vcc_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xf9,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, m0
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xfd,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, exec_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xff,0x00,0x00]
|
||||
v_fmac_f32_e64 v5, v1, exec_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x01,0x01,0x00]
|
||||
v_fmac_f32_e64 v5, v1, 0
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x83,0x01,0x00]
|
||||
v_fmac_f32_e64 v5, v1, -1
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xe1,0x01,0x00]
|
||||
v_fmac_f32_e64 v5, v1, 0.5
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0xef,0x01,0x00]
|
||||
v_fmac_f32_e64 v5, v1, -4.0
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x20]
|
||||
v_fmac_f32_e64 v5, -v1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x40]
|
||||
v_fmac_f32_e64 v5, v1, -v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x60]
|
||||
v_fmac_f32_e64 v5, -v1, -v2
|
||||
// CHECK: encoding: [0x05,0x01,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, |v1|, v2
|
||||
// CHECK: encoding: [0x05,0x02,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, v1, |v2|
|
||||
// CHECK: encoding: [0x05,0x03,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, |v1|, |v2|
|
||||
// CHECK: encoding: [0x05,0x80,0x3b,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_fmac_f32_e64 v5, v1, v2 clamp
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x08]
|
||||
v_fmac_f32_e64 v5, v1, v2 mul:2
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x10]
|
||||
v_fmac_f32_e64 v5, v1, v2 mul:4
|
||||
// CHECK: encoding: [0x05,0x00,0x3b,0xd1,0x01,0x05,0x02,0x18]
|
||||
v_fmac_f32_e64 v5, v1, v2 div:2
|
||||
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0xfe,0x77,0x01,0xe4,0x00,0x00]
|
||||
v_fmac_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0xff,0xe4,0x00,0x00]
|
||||
v_fmac_f32_dpp v5, v255, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0xfe,0x0b,0x76,0x01,0xe4,0x00,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x42,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_bcast:15 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x43,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_bcast:31 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x30,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 wave_shl:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x34,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 wave_rol:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x38,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 wave_shr:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x3c,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 wave_ror:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x10]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x30]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xf0]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xf0]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x01]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x03]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x0f]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0x0f]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x08,0x00]
|
||||
v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x10,0x00]
|
||||
v_fmac_f32_dpp v5, -v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x20,0x00]
|
||||
v_fmac_f32_dpp v5, |v1|, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x40,0x00]
|
||||
v_fmac_f32_dpp v5, v1, -v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// CHECK: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x80,0x00]
|
||||
v_fmac_f32_dpp v5, v1, |v2| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
|
||||
// CHECK: encoding: [0x01,0x05,0x0a,0x7a]
|
||||
v_xnor_b32 v5, v1, v2
|
||||
// CHECK: encoding: [0x01,0x05,0xfe,0x7b]
|
||||
v_xnor_b32 v255, v1, v2
|
||||
// CHECK: encoding: [0xff,0x05,0x0a,0x7a]
|
||||
v_xnor_b32 v5, v255, v2
|
||||
// CHECK: encoding: [0x01,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, s1, v2
|
||||
// CHECK: encoding: [0x65,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, s101, v2
|
||||
// CHECK: encoding: [0x66,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, flat_scratch_lo, v2
|
||||
// CHECK: encoding: [0x67,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, flat_scratch_hi, v2
|
||||
// CHECK: encoding: [0x6a,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, vcc_lo, v2
|
||||
// CHECK: encoding: [0x6b,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, vcc_hi, v2
|
||||
// CHECK: encoding: [0x7c,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, m0, v2
|
||||
// CHECK: encoding: [0x7e,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, exec_lo, v2
|
||||
// CHECK: encoding: [0x7f,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, exec_hi, v2
|
||||
// CHECK: encoding: [0x80,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, 0, v2
|
||||
// CHECK: encoding: [0xc1,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, -1, v2
|
||||
// CHECK: encoding: [0xf0,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, 0.5, v2
|
||||
// CHECK: encoding: [0xf7,0x04,0x0a,0x7a]
|
||||
v_xnor_b32 v5, -4.0, v2
|
||||
// CHECK: encoding: [0xff,0x04,0x0a,0x7a,0x56,0x34,0x12,0xaf]
|
||||
v_xnor_b32 v5, 0xaf123456, v2
|
||||
// CHECK: encoding: [0xff,0x04,0x0a,0x7a,0x73,0x72,0x71,0x3f]
|
||||
v_xnor_b32 v5, 0x3f717273, v2
|
||||
// CHECK: encoding: [0x01,0xff,0x0b,0x7a]
|
||||
v_xnor_b32 v5, v1, v255
|
||||
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, v1, v2
|
||||
// CHECK: encoding: [0xff,0x00,0x3d,0xd1,0x01,0x05,0x02,0x00]
|
||||
v_xnor_b32_e64 v255, v1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0xff,0x05,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, v255, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, s1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x65,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, s101, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x66,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, flat_scratch_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x67,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, flat_scratch_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x6a,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, vcc_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x6b,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, vcc_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x7c,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, m0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x7e,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, exec_lo, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x7f,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, exec_hi, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x80,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, 0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0xc1,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, -1, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0xf0,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, 0.5, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0xf7,0x04,0x02,0x00]
|
||||
v_xnor_b32_e64 v5, -4.0, v2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xff,0x03,0x00]
|
||||
v_xnor_b32_e64 v5, v1, v255
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0x05,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, s2
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xcb,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, s101
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xcd,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, flat_scratch_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xcf,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, flat_scratch_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xd5,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, vcc_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xd7,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, vcc_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xf9,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, m0
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xfd,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, exec_lo
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xff,0x00,0x00]
|
||||
v_xnor_b32_e64 v5, v1, exec_hi
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0x01,0x01,0x00]
|
||||
v_xnor_b32_e64 v5, v1, 0
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0x83,0x01,0x00]
|
||||
v_xnor_b32_e64 v5, v1, -1
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xe1,0x01,0x00]
|
||||
v_xnor_b32_e64 v5, v1, 0.5
|
||||
// CHECK: encoding: [0x05,0x00,0x3d,0xd1,0x01,0xef,0x01,0x00]
|
||||
v_xnor_b32_e64 v5, v1, -4.0
|
||||
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0xfe,0x7b,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0xff,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v255, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, s1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x65,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, s101, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x66,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, flat_scratch_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x67,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, flat_scratch_hi, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x6a,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, vcc_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x6b,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, vcc_hi, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x7c,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, m0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x7e,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, exec_lo, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x7f,0x06,0x86,0x06]
|
||||
v_xnor_b32_sdwa v5, exec_hi, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0xfe,0x0b,0x7a,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x00,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x01,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x02,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x03,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x04,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x05,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x0e,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x16,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x16,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x00,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x01,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x02,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x03,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x04,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x05,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x0e,0x06]
|
||||
v_xnor_b32_sdwa v5, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x06]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x00]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x01]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x02]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x03]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x04]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x05]
|
||||
v_xnor_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
|
||||
// CHECK: encoding: [0xf9,0x04,0x0a,0x7a,0x01,0x06,0x06,0x0e]
|
||||
v_xnor_b32_sdwa v5, v1, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
|
||||
|
||||
//
|
||||
// VOP3P Instructions.
|
||||
//
|
||||
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3
|
||||
// CHECK: encoding: [0x00,0x40,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3
|
||||
// CHECK: encoding: [0x00,0x40,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3
|
||||
// CHECK: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot4_i32_i8 v0, v1, v2, v3
|
||||
// CHECK: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot4_u32_u8 v0, v1, v2, v3
|
||||
// CHECK: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot8_i32_i4 v0, v1, v2, v3
|
||||
// CHECK: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot8_u32_u4 v0, v1, v2, v3
|
||||
|
||||
//
|
||||
// Test op_sel/op_sel_hi.
|
||||
//
|
||||
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0]
|
||||
// CHECK: encoding: [0x00,0x50,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1]
|
||||
// CHECK: encoding: [0x00,0x48,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0]
|
||||
// CHECK: encoding: [0x00,0x58,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0]
|
||||
// CHECK: encoding: [0x00,0x50,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1]
|
||||
// CHECK: encoding: [0x00,0x48,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0]
|
||||
// CHECK: encoding: [0x00,0x58,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0]
|
||||
// CHECK: encoding: [0x00,0x50,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1]
|
||||
// CHECK: encoding: [0x00,0x48,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0]
|
||||
// CHECK: encoding: [0x00,0x58,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x00,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x10,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x08,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x04]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x14]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
|
||||
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
|
||||
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
|
||||
|
||||
//
|
||||
// Test clamp.
|
||||
//
|
||||
|
||||
// CHECK: encoding: [0x00,0xc0,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 clamp
|
||||
// CHECK: encoding: [0x00,0xc0,0xa6,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_i32_i16 v0, v1, v2, v3 clamp
|
||||
// CHECK: encoding: [0x00,0xc0,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_u32_u16 v0, v1, v2, v3 clamp
|
||||
|
||||
//
|
||||
// Test neg_lo/neg_hi.
|
||||
//
|
||||
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x40,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[0,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x41,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[1,0,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x42,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[0,1,0]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x44,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[0,0,1]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x43,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[1,1,0]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x45,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[1,0,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,0] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x3c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,0] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x5c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,1,0] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x9c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[0,0,1] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0x7c]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0xbc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,0,1] neg_hi:[1,1,1]
|
||||
// CHECK: encoding: [0x00,0x47,0xa3,0xd3,0x01,0x05,0x0e,0xfc]
|
||||
v_dot2_f32_f16 v0, v1, v2, v3 neg_lo:[1,1,1] neg_hi:[1,1,1]
|
102
test/MC/AMDGPU/fma-mix.s
Normal file
102
test/MC/AMDGPU/fma-mix.s
Normal file
@ -0,0 +1,102 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck -check-prefix=GFX9-FMAMIX %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx901 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-MADMIX-ERR %s
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-MADMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_fma_mixlo_f16 v0, v1, v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-MADMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_fma_mixhi_f16 v0, v1, v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-MADMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
//
|
||||
// Regular source modifiers on non-packed instructions
|
||||
//
|
||||
|
||||
v_fma_mix_f32 v0, abs(v1), v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
// FIXME: Better error
|
||||
// GFX9-MADMIX-ERR: error: invalid operand for instruction
|
||||
|
||||
v_fma_mix_f32 v0, v1, abs(v2), v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, abs(v3)
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, -v1, v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_fma_mix_f32 v0, v1, -v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, -v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_fma_mix_f32 v0, -abs(v1), v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_fma_mix_f32 v0, v1, -abs(v2), v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, -abs(v3)
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_fma_mixlo_f16 v0, abs(v1), -v2, abs(v3)
|
||||
// GFX9-FMAMIX: v_fma_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_fma_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
|
||||
// GFX9-FMAMIX: v_fma_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]
|
||||
|
||||
v_fma_mixlo_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mixhi_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
//
|
||||
// op_sel with non-packed instructions
|
||||
//
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// FIXME: Better error
|
||||
// GFX-MADMIX-ERR: error: unknown token in expression
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1]
|
||||
// GFX9-FMAMIX: v_fma_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
|
||||
v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9-FMAMIX: v_fma_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9-FMAMIX: v_fma_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c]
|
102
test/MC/AMDGPU/mad-mix.s
Normal file
102
test/MC/AMDGPU/mad-mix.s
Normal file
@ -0,0 +1,102 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9-MADMIX %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX9-FMAMIX-ERR %s
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3
|
||||
// GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3
|
||||
// GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// GFX9-FMAMIX-ERR: error: instruction not supported on this GPU
|
||||
|
||||
//
|
||||
// Regular source modifiers on non-packed instructions
|
||||
//
|
||||
|
||||
v_mad_mix_f32 v0, abs(v1), v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
// FIXME: Better error
|
||||
// GFX9-FMAMIX-ERR: error: invalid operand for instruction
|
||||
|
||||
v_mad_mix_f32 v0, v1, abs(v2), v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, abs(v3)
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, -v1, v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mix_f32 v0, -abs(v1), v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -abs(v2), v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -abs(v3)
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3)
|
||||
// GFX9-MADMIX: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
|
||||
// GFX9-MADMIX: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
//
|
||||
// op_sel with non-packed instructions
|
||||
//
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
// FIXME: Better error
|
||||
// GFX-FMAMIX-ERR: error: unknown token in expression
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1]
|
||||
// GFX9-MADMIX: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9-MADMIX: v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9-MADMIX: v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c]
|
@ -1,5 +1,4 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
//
|
||||
// Test op_sel/op_sel_hi
|
||||
//
|
||||
@ -167,95 +166,3 @@ v_pk_min_f16 v0, v1, v2
|
||||
|
||||
v_pk_max_f16 v0, v1, v2
|
||||
// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x92,0xd3,0x01,0x05,0x02,0x18]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3
|
||||
// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
//
|
||||
// Regular source modifiers on non-packed instructions
|
||||
//
|
||||
|
||||
v_mad_mix_f32 v0, abs(v1), v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, |v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, abs(v2), v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, |v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, abs(v3)
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, |v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, -v1, v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, -v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, -v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, -v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mix_f32 v0, -abs(v1), v2, v3
|
||||
// GFX9: v_mad_mix_f32 v0, -|v1|, v2, v3 ; encoding: [0x00,0x01,0xa0,0xd3,0x01,0x05,0x0e,0x24]
|
||||
|
||||
v_mad_mix_f32 v0, v1, -abs(v2), v3
|
||||
// GFX9: v_mad_mix_f32 v0, v1, -|v2|, v3 ; encoding: [0x00,0x02,0xa0,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, -abs(v3)
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, -|v3| ; encoding: [0x00,0x04,0xa0,0xd3,0x01,0x05,0x0e,0x84]
|
||||
|
||||
v_mad_mixlo_f16 v0, abs(v1), -v2, abs(v3)
|
||||
// GFX9: v_mad_mixlo_f16 v0, |v1|, -v2, |v3| ; encoding: [0x00,0x05,0xa1,0xd3,0x01,0x05,0x0e,0x44]
|
||||
|
||||
v_mad_mixhi_f16 v0, -v1, abs(v2), -abs(v3)
|
||||
// GFX9: v_mad_mixhi_f16 v0, -v1, |v2|, -|v3| ; encoding: [0x00,0x06,0xa2,0xd3,0x01,0x05,0x0e,0xa4]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa1,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3 clamp
|
||||
// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0xa2,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
//
|
||||
// op_sel with non-packed instructions
|
||||
//
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x08,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x10,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x00,0x20,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel:[1,1,1] ; encoding: [0x00,0x38,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x14]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[0,0,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
|
||||
v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1]
|
||||
// GFX9: v_mad_mix_f32 v0, v1, v2, v3 op_sel_hi:[1,1,1] ; encoding: [0x00,0x40,0xa0,0xd3,0x01,0x05,0x0e,0x1c]
|
||||
|
||||
v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9: v_mad_mixlo_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa1,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
||||
v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp
|
||||
// GFX9: v_mad_mixhi_f16 v0, v1, v2, v3 op_sel_hi:[1,0,1] clamp ; encoding: [0x00,0xc0,0xa2,0xd3,0x01,0x05,0x0e,0x0c]
|
||||
|
6
test/MC/Disassembler/AMDGPU/mad_mix.txt
Normal file
6
test/MC/Disassembler/AMDGPU/mad_mix.txt
Normal file
@ -0,0 +1,6 @@
|
||||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX900
|
||||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX906
|
||||
|
||||
# GFX900: v_mad_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
# GFX906: v_fma_mix_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04]
|
||||
0x00,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04
|
@ -85,6 +85,13 @@
|
||||
# RUN: yaml2obj -docnum=29 %s > %t.o.29
|
||||
# RUN: llvm-readobj -s -file-headers %t.o.29 | FileCheck --check-prefixes=ELF-ALL,ELF-GFX902 %s
|
||||
# RUN: obj2yaml %t.o.29 | FileCheck --check-prefixes=YAML-GFX902 %s
|
||||
# RUN: yaml2obj -docnum=30 %s > %t.o.30
|
||||
# RUN: llvm-readobj -s -file-headers %t.o.30 | FileCheck --check-prefixes=ELF-ALL,ELF-GFX904 %s
|
||||
# RUN: obj2yaml %t.o.30 | FileCheck --check-prefixes=YAML-GFX904 %s
|
||||
# RUN: yaml2obj -docnum=31 %s > %t.o.31
|
||||
# RUN: llvm-readobj -s -file-headers %t.o.31 | FileCheck --check-prefixes=ELF-ALL,ELF-GFX906 %s
|
||||
# RUN: obj2yaml %t.o.31 | FileCheck --check-prefixes=YAML-GFX906 %s
|
||||
|
||||
|
||||
# ELF-ALL: Flags [
|
||||
# ELF-R600: EF_AMDGPU_MACH_R600_R600 (0x1)
|
||||
@ -116,6 +123,8 @@
|
||||
# ELF-GFX810: EF_AMDGPU_MACH_AMDGCN_GFX810 (0x2B)
|
||||
# ELF-GFX900: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
|
||||
# ELF-GFX902: EF_AMDGPU_MACH_AMDGCN_GFX902 (0x2D)
|
||||
# ELF-GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
|
||||
# ELF-GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
|
||||
# ELF-ALL: ]
|
||||
|
||||
# YAML-R600: Flags: [ EF_AMDGPU_MACH_R600_R600 ]
|
||||
@ -147,6 +156,8 @@
|
||||
# YAML-GFX810: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX810 ]
|
||||
# YAML-GFX900: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900 ]
|
||||
# YAML-GFX902: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX902 ]
|
||||
# YAML-GFX904: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX904 ]
|
||||
# YAML-GFX906: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX906 ]
|
||||
|
||||
# Doc1
|
||||
--- !ELF
|
||||
@ -466,3 +477,25 @@ FileHeader:
|
||||
Machine: EM_AMDGPU
|
||||
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX902 ]
|
||||
...
|
||||
|
||||
# Doc30
|
||||
--- !ELF
|
||||
FileHeader:
|
||||
Class: ELFCLASS64
|
||||
Data: ELFDATA2LSB
|
||||
OSABI: ELFOSABI_NONE
|
||||
Type: ET_REL
|
||||
Machine: EM_AMDGPU
|
||||
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX904 ]
|
||||
...
|
||||
|
||||
# Doc31
|
||||
--- !ELF
|
||||
FileHeader:
|
||||
Class: ELFCLASS64
|
||||
Data: ELFDATA2LSB
|
||||
OSABI: ELFOSABI_NONE
|
||||
Type: ET_REL
|
||||
Machine: EM_AMDGPU
|
||||
Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX906 ]
|
||||
...
|
||||
|
@ -1292,6 +1292,8 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlags[] = {
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
|
||||
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK)
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user