mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AMDGPU] SDWA: several fixes for V_CVT and VOPC instructions
Summary: 1. Instruction V_CVT_U32_F32 allow omod operand (see SIInstrInfo.td:1435). In fact this operand shouldn't be allowed here. This fix checks if SDWA pseudo instruction has OMod operand and then copy it. 2. There were several problems with support of VOPC instructions in SDWA peephole pass. Reviewers: tstellar, arsenm, vpykhtin, airlied, kzhuravl Subscribers: wdng, nhaehnle, yaxunl, dstuttard, tpr, sarnex, t-tye Differential Revision: https://reviews.llvm.org/D34626 llvm-svn: 306413
This commit is contained in:
parent
e0219a8c24
commit
48e96ee80f
@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
|
||||
"Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
|
||||
>;
|
||||
|
||||
def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
|
||||
"HasSDWAClampVOPC",
|
||||
def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
|
||||
"HasSDWAOutModsVOPC",
|
||||
"true",
|
||||
"Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
|
||||
>;
|
||||
@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
|
||||
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
|
||||
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
|
||||
FeatureScalarStores, FeatureInv2PiInlineImm,
|
||||
FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
|
||||
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
|
||||
]
|
||||
>;
|
||||
|
||||
|
@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasSDWAScalar(false),
|
||||
HasSDWASdst(false),
|
||||
HasSDWAMac(false),
|
||||
HasSDWAClampVOPC(false),
|
||||
HasSDWAOutModsVOPC(false),
|
||||
HasDPP(false),
|
||||
FlatAddressSpace(false),
|
||||
FlatInstOffsets(false),
|
||||
|
@ -153,7 +153,7 @@ protected:
|
||||
bool HasSDWAScalar;
|
||||
bool HasSDWASdst;
|
||||
bool HasSDWAMac;
|
||||
bool HasSDWAClampVOPC;
|
||||
bool HasSDWAOutModsVOPC;
|
||||
bool HasDPP;
|
||||
bool FlatAddressSpace;
|
||||
bool FlatInstOffsets;
|
||||
@ -452,8 +452,8 @@ public:
|
||||
return HasSDWAMac;
|
||||
}
|
||||
|
||||
bool hasSDWAClampVOPC() const {
|
||||
return HasSDWAClampVOPC;
|
||||
bool hasSDWAOutModsVOPC() const {
|
||||
return HasSDWAOutModsVOPC;
|
||||
}
|
||||
|
||||
/// \brief Returns the offset in bytes from the start of the input buffer
|
||||
|
@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
|
||||
using namespace AMDGPU::SDWA;
|
||||
|
||||
if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
|
||||
if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
|
||||
// XXX: static_cast<int> is needed to avoid stupid warning:
|
||||
// compare with unsigned is always true
|
||||
if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
|
||||
Val <= SDWA9EncValues::SRC_VGPR_MAX) {
|
||||
return createRegOperand(getVgprClassId(Width),
|
||||
Val - SDWA9EncValues::SRC_VGPR_MIN);
|
||||
|
@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
||||
}
|
||||
|
||||
int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
|
||||
if ( DstIdx == -1)
|
||||
DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
|
||||
|
||||
const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
|
||||
|
||||
@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
||||
ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
|
||||
return false;
|
||||
}
|
||||
} else if (!ST.hasSDWAClampVOPC()) {
|
||||
} else if (!ST.hasSDWAOutModsVOPC()) {
|
||||
// No clamp allowed on GFX9 for VOPC
|
||||
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
|
||||
if (Clamp != nullptr &&
|
||||
(!Clamp->isImm() || Clamp->getImm() != 0)) {
|
||||
if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
|
||||
ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
|
||||
return false;
|
||||
}
|
||||
|
||||
// No omod allowed on GFX9 for VOPC
|
||||
const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
|
||||
ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
|
||||
if (!ST.hasSDWAOutModsVOPC() &&
|
||||
(TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
|
||||
TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
|
||||
return false;
|
||||
|
||||
} else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
|
||||
} else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
|
||||
!TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
|
||||
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
|
||||
assert(SDWAOpcode != -1);
|
||||
|
||||
// Copy dst, if it is present in original then should also be present in SDWA
|
||||
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
|
||||
if (!Dst && !TII->isVOPC(MI))
|
||||
return false;
|
||||
|
||||
const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
|
||||
|
||||
// Create SDWA version of instruction MI and initialize its operands
|
||||
MachineInstrBuilder SDWAInst =
|
||||
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
|
||||
|
||||
// Copy dst, if it is present in original then should also be present in SDWA
|
||||
MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
|
||||
if (Dst) {
|
||||
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
|
||||
SDWAInst.add(*Dst);
|
||||
} else {
|
||||
Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
|
||||
} else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
|
||||
assert(Dst &&
|
||||
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
|
||||
SDWAInst.add(*Dst);
|
||||
} else {
|
||||
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
|
||||
SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
|
||||
}
|
||||
|
||||
// Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
|
||||
@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
|
||||
}
|
||||
|
||||
// Copy omod if present, initialize otherwise if needed
|
||||
MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
if (OMod) {
|
||||
assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
|
||||
SDWAInst.add(*OMod);
|
||||
} else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
|
||||
SDWAInst.addImm(0);
|
||||
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
|
||||
MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
|
||||
if (OMod) {
|
||||
SDWAInst.add(*OMod);
|
||||
} else {
|
||||
SDWAInst.addImm(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize dst_sel and dst_unused if present
|
||||
if (Dst) {
|
||||
assert(
|
||||
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
|
||||
AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
|
||||
// Initialize dst_sel if present
|
||||
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
|
||||
SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
|
||||
}
|
||||
|
||||
// Initialize dst_unused if present
|
||||
if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
|
||||
SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
|
||||
}
|
||||
|
||||
|
446
test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
Normal file
446
test/CodeGen/AMDGPU/sdwa-peephole-instr.mir
Normal file
@ -0,0 +1,446 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=VI -check-prefix=GFX89 -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GFX89 -check-prefix=GCN %s
|
||||
|
||||
# GFX89-LABEL: {{^}}name: vop1_instructions
|
||||
|
||||
# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
||||
|
||||
|
||||
# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
||||
# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec
|
||||
|
||||
|
||||
# VI: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec
|
||||
|
||||
# GFX9: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec
|
||||
|
||||
|
||||
---
|
||||
name: vop1_instructions
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: sreg_64 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: sreg_32_xm0 }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32 }
|
||||
- { id: 9, class: vgpr_32 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vgpr_32 }
|
||||
- { id: 17, class: vgpr_32 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 19, class: vgpr_32 }
|
||||
- { id: 20, class: vgpr_32 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: vgpr_32 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
- { id: 29, class: vgpr_32 }
|
||||
- { id: 30, class: vgpr_32 }
|
||||
- { id: 31, class: vgpr_32 }
|
||||
- { id: 32, class: vgpr_32 }
|
||||
- { id: 33, class: vgpr_32 }
|
||||
- { id: 34, class: vgpr_32 }
|
||||
- { id: 35, class: vgpr_32 }
|
||||
- { id: 36, class: vgpr_32 }
|
||||
- { id: 37, class: vgpr_32 }
|
||||
- { id: 38, class: vgpr_32 }
|
||||
- { id: 39, class: vgpr_32 }
|
||||
- { id: 40, class: vgpr_32 }
|
||||
- { id: 41, class: vgpr_32 }
|
||||
- { id: 42, class: vgpr_32 }
|
||||
- { id: 43, class: vgpr_32 }
|
||||
- { id: 44, class: vgpr_32 }
|
||||
- { id: 45, class: vgpr_32 }
|
||||
- { id: 46, class: vgpr_32 }
|
||||
- { id: 47, class: vgpr_32 }
|
||||
- { id: 48, class: vgpr_32 }
|
||||
- { id: 100, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
||||
|
||||
%2 = COPY %sgpr30_sgpr31
|
||||
%1 = COPY %vgpr2_vgpr3
|
||||
%0 = COPY %vgpr0_vgpr1
|
||||
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
||||
|
||||
%5 = S_MOV_B32 65535
|
||||
%6 = S_MOV_B32 65535
|
||||
|
||||
%10 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
||||
%11 = V_MOV_B32_e32 %10, implicit %exec
|
||||
%12 = V_LSHLREV_B32_e64 16, %11, implicit %exec
|
||||
%14 = V_FRACT_F32_e32 123, implicit %exec
|
||||
%15 = V_LSHLREV_B32_e64 16, %14, implicit %exec
|
||||
%16 = V_LSHRREV_B32_e64 16, %15, implicit %exec
|
||||
%17 = V_SIN_F32_e32 %16, implicit %exec
|
||||
%18 = V_LSHLREV_B32_e64 16, %17, implicit %exec
|
||||
%19 = V_LSHRREV_B32_e64 16, %18, implicit %exec
|
||||
%20 = V_CVT_U32_F32_e32 %19, implicit %exec
|
||||
%21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
|
||||
%23 = V_CVT_F32_I32_e32 123, implicit %exec
|
||||
%24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
|
||||
|
||||
%25 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
||||
%26 = V_MOV_B32_e64 %25, implicit %exec
|
||||
%26 = V_LSHLREV_B32_e64 16, %26, implicit %exec
|
||||
%27 = V_FRACT_F32_e64 0, %6, 0, 0, implicit %exec
|
||||
%28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
|
||||
%29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
|
||||
%30 = V_SIN_F32_e64 0, %29, 0, 0, implicit %exec
|
||||
%31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
|
||||
%32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
|
||||
%33 = V_CVT_U32_F32_e64 0, %32, 0, 0, implicit %exec
|
||||
%34 = V_LSHLREV_B32_e64 16, %33, implicit %exec
|
||||
%35 = V_CVT_F32_I32_e64 %6, 0, 0, implicit %exec
|
||||
%36 = V_LSHLREV_B32_e64 16, %35, implicit %exec
|
||||
|
||||
|
||||
%37 = V_LSHRREV_B32_e64 16, %36, implicit %exec
|
||||
%38 = V_FRACT_F32_e64 1, %37, 0, 0, implicit %exec
|
||||
%39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
|
||||
%40 = V_LSHRREV_B32_e64 16, %39, implicit %exec
|
||||
%41 = V_SIN_F32_e64 0, %40, 1, 0, implicit %exec
|
||||
%42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
|
||||
%43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
|
||||
%44 = V_CVT_U32_F32_e64 1, %43, 0, 0, implicit %exec
|
||||
%45 = V_LSHLREV_B32_e64 16, %44, implicit %exec
|
||||
%46 = V_LSHRREV_B32_e64 16, %45, implicit %exec
|
||||
%47 = V_CVT_F32_I32_e64 %46, 0, 1, implicit %exec
|
||||
%48 = V_LSHLREV_B32_e64 16, %47, implicit %exec
|
||||
|
||||
|
||||
%100 = V_MOV_B32_e32 %48, implicit %exec
|
||||
|
||||
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
||||
%sgpr30_sgpr31 = COPY %2
|
||||
S_SETPC_B64_return %sgpr30_sgpr31
|
||||
|
||||
...
|
||||
---
|
||||
# GCN-LABEL: {{^}}name: vop2_instructions
|
||||
|
||||
|
||||
# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
||||
|
||||
# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec
|
||||
|
||||
|
||||
# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec
|
||||
|
||||
# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec
|
||||
|
||||
|
||||
# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
|
||||
|
||||
# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec
|
||||
|
||||
name: vop2_instructions
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: sreg_64 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: sreg_32_xm0 }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32 }
|
||||
- { id: 9, class: vgpr_32 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vgpr_32 }
|
||||
- { id: 17, class: vgpr_32 }
|
||||
- { id: 18, class: vgpr_32 }
|
||||
- { id: 19, class: vgpr_32 }
|
||||
- { id: 20, class: vgpr_32 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: vgpr_32 }
|
||||
- { id: 28, class: vgpr_32 }
|
||||
- { id: 29, class: vgpr_32 }
|
||||
- { id: 30, class: vgpr_32 }
|
||||
- { id: 31, class: vgpr_32 }
|
||||
- { id: 32, class: vgpr_32 }
|
||||
- { id: 33, class: vgpr_32 }
|
||||
- { id: 34, class: vgpr_32 }
|
||||
- { id: 35, class: vgpr_32 }
|
||||
- { id: 36, class: vgpr_32 }
|
||||
- { id: 37, class: vgpr_32 }
|
||||
- { id: 38, class: vgpr_32 }
|
||||
- { id: 39, class: vgpr_32 }
|
||||
- { id: 40, class: vgpr_32 }
|
||||
- { id: 41, class: vgpr_32 }
|
||||
- { id: 42, class: vgpr_32 }
|
||||
- { id: 43, class: vgpr_32 }
|
||||
- { id: 44, class: vgpr_32 }
|
||||
- { id: 45, class: vgpr_32 }
|
||||
- { id: 46, class: vgpr_32 }
|
||||
- { id: 47, class: vgpr_32 }
|
||||
- { id: 48, class: vgpr_32 }
|
||||
- { id: 49, class: vgpr_32 }
|
||||
- { id: 50, class: vgpr_32 }
|
||||
- { id: 51, class: vgpr_32 }
|
||||
- { id: 52, class: vgpr_32 }
|
||||
- { id: 53, class: vgpr_32 }
|
||||
- { id: 54, class: vgpr_32 }
|
||||
- { id: 55, class: vgpr_32 }
|
||||
- { id: 56, class: vgpr_32 }
|
||||
- { id: 57, class: vgpr_32 }
|
||||
- { id: 58, class: vgpr_32 }
|
||||
- { id: 59, class: vgpr_32 }
|
||||
- { id: 60, class: vgpr_32 }
|
||||
- { id: 100, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
||||
|
||||
%2 = COPY %sgpr30_sgpr31
|
||||
%1 = COPY %vgpr2_vgpr3
|
||||
%0 = COPY %vgpr0_vgpr1
|
||||
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
||||
|
||||
%5 = S_MOV_B32 65535
|
||||
%6 = S_MOV_B32 65535
|
||||
|
||||
%11 = V_LSHRREV_B32_e64 16, %3, implicit %exec
|
||||
%12 = V_AND_B32_e32 %6, %11, implicit %exec
|
||||
%13 = V_LSHLREV_B32_e64 16, %12, implicit %exec
|
||||
%14 = V_LSHRREV_B32_e64 16, %13, implicit %exec
|
||||
%15 = V_BFE_U32 %13, 8, 8, implicit %exec
|
||||
%16 = V_ADD_F32_e32 %14, %15, implicit %exec
|
||||
%17 = V_LSHLREV_B32_e64 16, %16, implicit %exec
|
||||
%18 = V_LSHRREV_B32_e64 16, %17, implicit %exec
|
||||
%19 = V_BFE_U32 %17, 8, 8, implicit %exec
|
||||
%20 = V_SUB_F16_e32 %18, %19, implicit %exec
|
||||
%21 = V_LSHLREV_B32_e64 16, %20, implicit %exec
|
||||
%22 = V_BFE_U32 %20, 8, 8, implicit %exec
|
||||
%23 = V_MAC_F32_e32 %21, %22, %22, implicit %exec
|
||||
%24 = V_LSHLREV_B32_e64 16, %23, implicit %exec
|
||||
%25 = V_LSHRREV_B32_e64 16, %24, implicit %exec
|
||||
%26 = V_BFE_U32 %24, 8, 8, implicit %exec
|
||||
%27 = V_MAC_F16_e32 %25, %26, %26, implicit %exec
|
||||
%28 = V_LSHLREV_B32_e64 16, %27, implicit %exec
|
||||
|
||||
%29 = V_LSHRREV_B32_e64 16, %28, implicit %exec
|
||||
%30 = V_AND_B32_e64 23, %29, implicit %exec
|
||||
%31 = V_LSHLREV_B32_e64 16, %30, implicit %exec
|
||||
%32 = V_LSHRREV_B32_e64 16, %31, implicit %exec
|
||||
%33 = V_BFE_U32 %31, 8, 8, implicit %exec
|
||||
%34 = V_ADD_F32_e64 0, %32, 0, %33, 0, 0, implicit %exec
|
||||
%35 = V_LSHLREV_B32_e64 16, %34, implicit %exec
|
||||
%37 = V_BFE_U32 %35, 8, 8, implicit %exec
|
||||
%38 = V_SUB_F16_e64 0, 23, 0, %37, 0, 0, implicit %exec
|
||||
%39 = V_LSHLREV_B32_e64 16, %38, implicit %exec
|
||||
%40 = V_BFE_U32 %39, 8, 8, implicit %exec
|
||||
%41 = V_MAC_F32_e64 0, 23, 0, %40, 0, %40, 0, 0, implicit %exec
|
||||
%42 = V_LSHLREV_B32_e64 16, %41, implicit %exec
|
||||
%43 = V_LSHRREV_B32_e64 16, %42, implicit %exec
|
||||
%44 = V_BFE_U32 %42, 8, 8, implicit %exec
|
||||
%45 = V_MAC_F16_e64 0, %43, 0, %44, 0, %44, 0, 0, implicit %exec
|
||||
%46 = V_LSHLREV_B32_e64 16, %45, implicit %exec
|
||||
|
||||
%47 = V_LSHRREV_B32_e64 16, %46, implicit %exec
|
||||
%48 = V_BFE_U32 %46, 8, 8, implicit %exec
|
||||
%49 = V_ADD_F32_e64 0, %47, 1, %48, 0, 0, implicit %exec
|
||||
%50 = V_LSHLREV_B32_e64 16, %49, implicit %exec
|
||||
%51 = V_BFE_U32 %50, 8, 8, implicit %exec
|
||||
%52 = V_SUB_F16_e64 1, 23, 1, %51, 0, 0, implicit %exec
|
||||
%53 = V_LSHLREV_B32_e64 16, %52, implicit %exec
|
||||
%54 = V_BFE_U32 %53, 8, 8, implicit %exec
|
||||
%55 = V_MAC_F32_e64 1, 23, 1, %54, 1, %54, 1, 0, implicit %exec
|
||||
%56 = V_LSHLREV_B32_e64 16, %55, implicit %exec
|
||||
%57 = V_LSHRREV_B32_e64 16, %56, implicit %exec
|
||||
%58 = V_BFE_U32 %56, 8, 8, implicit %exec
|
||||
%59 = V_MAC_F16_e64 1, %57, 1, %58, 1, %58, 0, 2, implicit %exec
|
||||
%60 = V_LSHLREV_B32_e64 16, %59, implicit %exec
|
||||
|
||||
%100 = V_MOV_B32_e32 %60, implicit %exec
|
||||
|
||||
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
||||
%sgpr30_sgpr31 = COPY %2
|
||||
S_SETPC_B64_return %sgpr30_sgpr31
|
||||
|
||||
...
|
||||
---
|
||||
|
||||
# GCN-LABEL: {{^}}name: vopc_instructions
|
||||
|
||||
# GFX89: %{{[0-9]+}} = V_MOV_B32_e32 123, implicit %exec
|
||||
# GFX89: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# GFX89: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# GFX89: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# GFX89: %vcc = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
|
||||
|
||||
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %exec, implicit %exec
|
||||
# VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec
|
||||
|
||||
# GFX9: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# GFX9: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec
|
||||
# GFX9: %{{[0-9]+}} = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
|
||||
|
||||
# VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec
|
||||
# VI: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
|
||||
# VI: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
|
||||
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# VI: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# VI: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# VI: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
|
||||
|
||||
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 0, implicit %exec
|
||||
# GFX9: %vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, 2, implicit-def %exec, implicit %exec
|
||||
# GFX9: %vcc = V_CMP_EQ_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 1, 2, implicit %exec
|
||||
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# GFX9: %vcc = V_CMPX_GT_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
# GFX9: %vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %{{[0-9]+}}, 1, 2, implicit-def %exec, implicit %exec
|
||||
|
||||
|
||||
name: vopc_instructions
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: vreg_64 }
|
||||
- { id: 1, class: vreg_64 }
|
||||
- { id: 2, class: sreg_64 }
|
||||
- { id: 3, class: vgpr_32 }
|
||||
- { id: 4, class: sreg_32_xm0 }
|
||||
- { id: 5, class: sreg_32_xm0 }
|
||||
- { id: 6, class: sreg_32_xm0 }
|
||||
- { id: 7, class: sreg_32_xm0 }
|
||||
- { id: 8, class: sreg_32 }
|
||||
- { id: 9, class: vgpr_32 }
|
||||
- { id: 10, class: vgpr_32 }
|
||||
- { id: 11, class: vgpr_32 }
|
||||
- { id: 12, class: vgpr_32 }
|
||||
- { id: 13, class: vgpr_32 }
|
||||
- { id: 14, class: vgpr_32 }
|
||||
- { id: 15, class: vgpr_32 }
|
||||
- { id: 16, class: vgpr_32 }
|
||||
- { id: 17, class: vgpr_32 }
|
||||
- { id: 18, class: sreg_64 }
|
||||
- { id: 19, class: sreg_64 }
|
||||
- { id: 20, class: vgpr_32 }
|
||||
- { id: 21, class: vgpr_32 }
|
||||
- { id: 22, class: vgpr_32 }
|
||||
- { id: 23, class: vgpr_32 }
|
||||
- { id: 24, class: vgpr_32 }
|
||||
- { id: 25, class: vgpr_32 }
|
||||
- { id: 26, class: vgpr_32 }
|
||||
- { id: 27, class: vgpr_32 }
|
||||
- { id: 100, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31
|
||||
|
||||
%2 = COPY %sgpr30_sgpr31
|
||||
%1 = COPY %vgpr2_vgpr3
|
||||
%0 = COPY %vgpr0_vgpr1
|
||||
%3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4)
|
||||
|
||||
%5 = S_MOV_B32 65535
|
||||
%6 = S_MOV_B32 65535
|
||||
|
||||
%10 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
V_CMP_EQ_F32_e32 123, killed %10, implicit-def %vcc, implicit %exec
|
||||
%11 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
V_CMPX_GT_F32_e32 123, killed %11, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
%12 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
V_CMP_LT_I32_e32 123, killed %12, implicit-def %vcc, implicit %exec
|
||||
%13 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
V_CMPX_EQ_I32_e32 123, killed %13, implicit-def %vcc, implicit-def %exec, implicit %exec
|
||||
|
||||
%14 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %14, 0, 0, implicit %exec
|
||||
%15 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%18 = V_CMPX_GT_F32_e64 0, 23, 0, killed %15, 0, 0, implicit-def %exec, implicit %exec
|
||||
%16 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMP_LT_I32_e64 %6, killed %16, implicit %exec
|
||||
%17 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%19 = V_CMPX_EQ_I32_e64 23, killed %17, implicit-def %exec, implicit %exec
|
||||
|
||||
%20 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %20, 1, 0, implicit %exec
|
||||
%21 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMPX_GT_F32_e64 0, 23, 0, killed %21, 0, 2, implicit-def %exec, implicit %exec
|
||||
%23 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMP_EQ_F32_e64 0, %6, 0, killed %23, 1, 2, implicit %exec
|
||||
%24 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMPX_GT_F32_e64 1, 23, 0, killed %24, 0, 0, implicit-def %exec, implicit %exec
|
||||
%25 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMPX_GT_F32_e64 0, 23, 1, killed %25, 0, 0, implicit-def %exec, implicit %exec
|
||||
%26 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %26, 0, 0, implicit-def %exec, implicit %exec
|
||||
%27 = V_AND_B32_e64 %5, %3, implicit %exec
|
||||
%vcc = V_CMPX_GT_F32_e64 1, 23, 1, killed %27, 1, 2, implicit-def %exec, implicit %exec
|
||||
|
||||
|
||||
%100 = V_MOV_B32_e32 %vcc_lo, implicit %exec
|
||||
|
||||
FLAT_STORE_DWORD %0, %100, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4)
|
||||
%sgpr30_sgpr31 = COPY %2
|
||||
S_SETPC_B64_return %sgpr30_sgpr31
|
@ -66,7 +66,7 @@ define amdgpu_kernel void @v_select_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}select_v4i8:
|
||||
; GCN: v_cndmask_b32_e32
|
||||
; GCN: v_cndmask_b32
|
||||
; GCN-NOT: cndmask
|
||||
define amdgpu_kernel void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) #0 {
|
||||
%cmp = icmp eq i8 %c, 0
|
||||
|
Loading…
Reference in New Issue
Block a user