mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU] gfx1010 core wave32 changes
Differential Revision: https://reviews.llvm.org/D63204 llvm-svn: 363934
This commit is contained in:
parent
222088eed6
commit
2653a95667
@ -777,7 +777,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
|
||||
FeatureLDSBankCount32,
|
||||
FeatureDLInsts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize64,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
FeatureScalarFlatScratchInsts,
|
||||
@ -795,7 +795,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize64,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
FeatureScalarFlatScratchInsts,
|
||||
@ -812,7 +812,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize64,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
FeatureScalarAtomics,
|
||||
FeatureScalarFlatScratchInsts,
|
||||
|
@ -50,19 +50,19 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
|
||||
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
|
||||
|
||||
def AMDGPUIfOp : SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
|
||||
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
|
||||
>;
|
||||
|
||||
def AMDGPUElseOp : SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
|
||||
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
|
||||
>;
|
||||
|
||||
def AMDGPULoopOp : SDTypeProfile<0, 2,
|
||||
[SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
|
||||
[SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
|
||||
>;
|
||||
|
||||
def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
|
||||
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
|
||||
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -101,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
|
||||
return addOperand(Inst, MCOperand::createImm(Imm));
|
||||
}
|
||||
|
||||
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
|
||||
uint64_t Addr, const void *Decoder) {
|
||||
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
|
||||
return addOperand(Inst, DAsm->decodeBoolReg(Val));
|
||||
}
|
||||
|
||||
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
|
||||
static DecodeStatus StaticDecoderName(MCInst &Inst, \
|
||||
unsigned Imm, \
|
||||
|
@ -946,6 +946,15 @@ public:
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
|
||||
const TargetRegisterInfo *TRI,
|
||||
const MachineFunction &MF)
|
||||
const override {
|
||||
if (OpNum >= TID.getNumOperands())
|
||||
return nullptr;
|
||||
return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
|
||||
}
|
||||
|
||||
void fixImplicitOperands(MachineInstr &MI) const;
|
||||
};
|
||||
|
||||
|
@ -766,6 +766,15 @@ def VOPDstS64orS32 : BoolRC {
|
||||
let PrintMethod = "printVOPDst";
|
||||
}
|
||||
|
||||
// SCSrc_i1 is the operand for pseudo instructions only.
|
||||
// Boolean immeadiates shall not be exposed to codegen instructions.
|
||||
def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
|
||||
let OperandNamespace = "AMDGPU";
|
||||
let OperandType = "OPERAND_REG_IMM_INT32";
|
||||
let ParserMatchClass = BoolReg;
|
||||
let DecoderMethod = "decodeBoolReg";
|
||||
}
|
||||
|
||||
// ===----------------------------------------------------------------------===//
|
||||
// ExpSrc* Special cases for exp src operands which are printed as
|
||||
// "off" depending on en operand.
|
||||
@ -804,11 +813,12 @@ def SDWASrc_i16 : SDWASrc<i16>;
|
||||
def SDWASrc_f32 : SDWASrc<f32>;
|
||||
def SDWASrc_f16 : SDWASrc<f16>;
|
||||
|
||||
def SDWAVopcDst : VOPDstOperand<SReg_64> {
|
||||
def SDWAVopcDst : BoolRC {
|
||||
let OperandNamespace = "AMDGPU";
|
||||
let OperandType = "OPERAND_SDWA_VOPC_DST";
|
||||
let EncoderMethod = "getSDWAVopcDstEncoding";
|
||||
let DecoderMethod = "decodeSDWAVopcDst";
|
||||
let PrintMethod = "printVOPDst";
|
||||
}
|
||||
|
||||
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
|
||||
@ -940,11 +950,6 @@ def f32kimm : kimmOperand<i32>;
|
||||
def KImmFP16MatchClass : KImmMatchClass<16>;
|
||||
def f16kimm : kimmOperand<i16>;
|
||||
|
||||
|
||||
def VOPDstS64 : VOPDstOperand <SReg_64> {
|
||||
let PrintMethod = "printVOPDst";
|
||||
}
|
||||
|
||||
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
|
||||
let Name = "RegOrImmWithFP"#opSize#"InputMods";
|
||||
let ParserMethod = "parseRegOrImmWithFPInputMods";
|
||||
@ -1237,7 +1242,7 @@ class getVALUDstForVT<ValueType VT> {
|
||||
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
|
||||
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
|
||||
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
|
||||
VOPDstOperand<SReg_64>)))); // else VT == i1
|
||||
VOPDstS64orS32)))); // else VT == i1
|
||||
}
|
||||
|
||||
// Returns the register class to use for the destination of VOP[12C]
|
||||
@ -1313,7 +1318,7 @@ class getVOP3SrcForVT<ValueType VT> {
|
||||
VSrc_f64,
|
||||
VSrc_b64),
|
||||
!if(!eq(VT.Value, i1.Value),
|
||||
SCSrc_i1,
|
||||
SSrc_i1,
|
||||
!if(isFP,
|
||||
!if(!eq(VT.Value, f16.Value),
|
||||
VSrc_f16,
|
||||
|
@ -121,14 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
|
||||
|
||||
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
|
||||
|
||||
def ENTER_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins i64imm:$src0)> {
|
||||
def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
|
||||
let Defs = [EXEC];
|
||||
let hasSideEffects = 0;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
}
|
||||
|
||||
def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
|
||||
def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
|
||||
let hasSideEffects = 0;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
@ -161,11 +161,11 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
|
||||
>;
|
||||
|
||||
def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
(outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
|
||||
def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
|
||||
(outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
(outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
|
||||
>;
|
||||
} // End usesCustomInserter = 1, Defs = [SCC]
|
||||
|
||||
@ -233,30 +233,30 @@ let isTerminator = 1 in {
|
||||
let OtherPredicates = [EnableLateCFGStructurize] in {
|
||||
def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
|
||||
(outs),
|
||||
(ins SReg_64:$vcc, brtarget:$target),
|
||||
(ins SReg_1:$vcc, brtarget:$target),
|
||||
[(brcond i1:$vcc, bb:$target)]> {
|
||||
let Size = 12;
|
||||
}
|
||||
}
|
||||
|
||||
def SI_IF: CFPseudoInstSI <
|
||||
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
|
||||
[(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
|
||||
(outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
|
||||
[(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
|
||||
let Constraints = "";
|
||||
let Size = 12;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
|
||||
def SI_ELSE : CFPseudoInstSI <
|
||||
(outs SReg_64:$dst),
|
||||
(ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
|
||||
(outs SReg_1:$dst),
|
||||
(ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
|
||||
let Size = 12;
|
||||
let hasSideEffects = 1;
|
||||
}
|
||||
|
||||
def SI_LOOP : CFPseudoInstSI <
|
||||
(outs), (ins SReg_64:$saved, brtarget:$target),
|
||||
[(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
|
||||
(outs), (ins SReg_1:$saved, brtarget:$target),
|
||||
[(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
|
||||
let Size = 8;
|
||||
let isBranch = 1;
|
||||
let hasSideEffects = 1;
|
||||
@ -265,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI <
|
||||
} // End isTerminator = 1
|
||||
|
||||
def SI_END_CF : CFPseudoInstSI <
|
||||
(outs), (ins SReg_64:$saved),
|
||||
[(int_amdgcn_end_cf i64:$saved)], 1, 1> {
|
||||
(outs), (ins SReg_1:$saved), [], 1, 1> {
|
||||
let Size = 4;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let isReMaterializable = 1;
|
||||
@ -276,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI <
|
||||
}
|
||||
|
||||
def SI_IF_BREAK : CFPseudoInstSI <
|
||||
(outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
|
||||
[(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
|
||||
(outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
|
||||
let Size = 4;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let isReMaterializable = 1;
|
||||
@ -303,7 +301,7 @@ multiclass PseudoInstKill <dag ins> {
|
||||
}
|
||||
}
|
||||
|
||||
defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
|
||||
defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
|
||||
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
|
||||
|
||||
let Defs = [EXEC,VCC] in
|
||||
@ -322,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
|
||||
}
|
||||
|
||||
def SI_PS_LIVE : PseudoInstSI <
|
||||
(outs SReg_64:$dst), (ins),
|
||||
(outs SReg_1:$dst), (ins),
|
||||
[(set i1:$dst, (int_amdgcn_ps_live))]> {
|
||||
let SALU = 1;
|
||||
}
|
||||
@ -584,7 +582,7 @@ def : GCNPat<
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(AMDGPUelse i64:$src, bb:$target),
|
||||
(AMDGPUelse i1:$src, bb:$target),
|
||||
(SI_ELSE $src, $target, 0)
|
||||
>;
|
||||
|
||||
|
@ -733,8 +733,6 @@ def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
|
||||
|
||||
defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
|
||||
|
||||
def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -344,7 +344,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
|
||||
let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
|
||||
let AsmDPP16 = AsmDPP#"$fi";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
|
||||
}
|
||||
|
||||
// Write out to vcc or arbitrary SGPR and read in from vcc or
|
||||
@ -358,7 +358,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
|
||||
let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
|
||||
let AsmDPP16 = AsmDPP#"$fi";
|
||||
let Outs32 = (outs DstRC:$vdst);
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
|
||||
|
||||
// Suppress src2 implied by type since the 32-bit encoding uses an
|
||||
// implicit VCC use.
|
||||
|
@ -183,7 +183,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
|
||||
let HasModifiers = 0;
|
||||
let HasClamp = 0;
|
||||
let HasOMod = 0;
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
|
||||
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
|
||||
}
|
||||
|
||||
@ -203,7 +203,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
|
||||
// FIXME: Hack to stop printing _e64
|
||||
let DstRC = RegisterOperand<VReg_64>;
|
||||
|
||||
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
|
||||
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
|
||||
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
|
||||
let Asm32 = "$src0, $src1";
|
||||
// The destination for 32-bit encoding is implicit.
|
||||
let HasDst32 = 0;
|
||||
let Outs64 = (outs VOPDstS64:$sdst);
|
||||
let Outs64 = (outs VOPDstS64orS32:$sdst);
|
||||
list<SchedReadWrite> Schedule = sched;
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: add3:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -46,6 +47,7 @@ define amdgpu_ps float @mad_no_add3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
|
||||
; GFX10-LABEL: mad_no_add3:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v4
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_mad_u32_u24 v0, v2, v3, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%a0 = shl i32 %a, 8
|
||||
@ -85,6 +87,7 @@ define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: add3_vgpr_b:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, s3, s2, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -107,6 +110,7 @@ define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: add3_vgpr_all2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, v1, v2, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %b, %c
|
||||
%result = add i32 %a, %x
|
||||
@ -129,6 +133,7 @@ define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: add3_vgpr_bc:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, s2, v0, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -151,6 +156,7 @@ define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: add3_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, v0, v1, 16
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = add i32 %x, 16
|
||||
@ -175,6 +181,7 @@ define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x
|
||||
; GFX10-LABEL: add3_multiuse_outer:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add3_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v3
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%inner = add i32 %a, %b
|
||||
@ -202,6 +209,7 @@ define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: add3_multiuse_inner:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v1, v0, v2
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%inner = add i32 %a, %b
|
||||
@ -240,6 +248,7 @@ define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float
|
||||
; GFX10-NEXT: v_add_f32_e64 v1, s3, 2.0
|
||||
; GFX10-NEXT: v_add_f32_e64 v2, s2, 1.0
|
||||
; GFX10-NEXT: v_add_f32_e64 v0, 0x40400000, s4
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
|
||||
; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}add_var_var_i1:
|
||||
; GCN: s_xor_b64
|
||||
; GFX9: s_xor_b64
|
||||
; GFX10: s_xor_b32
|
||||
define amdgpu_kernel void @add_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
|
||||
%a = load volatile i1, i1 addrspace(1)* %in0
|
||||
%b = load volatile i1, i1 addrspace(1)* %in1
|
||||
@ -12,7 +14,8 @@ define amdgpu_kernel void @add_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}add_var_imm_i1:
|
||||
; GCN: s_not_b64
|
||||
; GFX9: s_not_b64
|
||||
; GFX10: s_not_b32
|
||||
define amdgpu_kernel void @add_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) {
|
||||
%a = load volatile i1, i1 addrspace(1)* %in
|
||||
%add = add i1 %a, 1
|
||||
@ -22,7 +25,8 @@ define amdgpu_kernel void @add_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)
|
||||
|
||||
; GCN-LABEL: {{^}}add_i1_cf:
|
||||
; GCN: ; %endif
|
||||
; GCN: s_not_b64
|
||||
; GFX9: s_not_b64
|
||||
; GFX10: s_not_b32
|
||||
define amdgpu_kernel void @add_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: add_shl:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = shl i32 %x, %c
|
||||
@ -45,6 +46,7 @@ define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) {
|
||||
; GFX10-LABEL: add_shl_vgpr_c:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, s2, s3, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = shl i32 %x, %c
|
||||
@ -67,6 +69,7 @@ define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) {
|
||||
; GFX10-LABEL: add_shl_vgpr_ac:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = shl i32 %x, %c
|
||||
@ -89,6 +92,7 @@ define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: add_shl_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, 9
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, %b
|
||||
%result = shl i32 %x, 9
|
||||
@ -112,6 +116,7 @@ define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) {
|
||||
; GFX10-LABEL: add_shl_vgpr_const_inline_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x7e800
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, 1012
|
||||
%result = shl i32 %x, 9
|
||||
@ -138,6 +143,7 @@ define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) {
|
||||
; GFX10-LABEL: add_shl_vgpr_inline_const_x2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x600
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = add i32 %a, 3
|
||||
%result = shl i32 %x, 9
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @and_or(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: and_or:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -46,6 +47,7 @@ define amdgpu_ps float @and_or_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: and_or_vgpr_b:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, s2, v0, s3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -68,6 +70,7 @@ define amdgpu_ps float @and_or_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: and_or_vgpr_ab:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, v1, s2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -90,6 +93,7 @@ define amdgpu_ps float @and_or_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: and_or_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, 4, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 4, %a
|
||||
%result = or i32 %x, %b
|
||||
@ -113,6 +117,7 @@ define amdgpu_ps float @and_or_vgpr_const_inline_const(i32 %a) {
|
||||
; GFX10-LABEL: and_or_vgpr_const_inline_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, 20, 0x808
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 20, %a
|
||||
%result = or i32 %x, 2056
|
||||
@ -135,6 +140,7 @@ define amdgpu_ps float @and_or_vgpr_inline_const_x2(i32 %a) {
|
||||
; GFX10-LABEL: and_or_vgpr_inline_const_x2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, 4, 1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = and i32 4, %a
|
||||
%result = or i32 %x, 1
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s
|
||||
; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
|
||||
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
|
||||
|
||||
@var = addrspace(1) global float 0.0
|
||||
|
||||
|
@ -1,9 +1,23 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,WAVE64 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,WAVE32 %s
|
||||
|
||||
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo14:
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
||||
; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ffc, [[FI]]
|
||||
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
|
||||
define amdgpu_kernel void @scratch_buffer_known_high_masklo14() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
%toint = ptrtoint i32 addrspace(5)* %alloca to i32
|
||||
%masked = and i32 %toint, 16383
|
||||
store volatile i32 %masked, i32 addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
||||
; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]]
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
|
||||
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
|
||||
define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
@ -15,8 +29,11 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
|
||||
|
||||
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
||||
; GCN-NOT: [[FI]]
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
|
||||
; WAVE64-NOT: [[FI]]
|
||||
; WAVE64: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
|
||||
|
||||
; WAVE32: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1fffc, [[FI]]
|
||||
; WAVE32: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
|
||||
define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
@ -29,7 +46,7 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
|
||||
; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18:
|
||||
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
|
||||
; GCN-NOT: [[FI]]
|
||||
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
|
||||
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
|
||||
define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 {
|
||||
%alloca = alloca i32, align 4, addrspace(5)
|
||||
store volatile i32 0, i32 addrspace(5)* %alloca
|
||||
|
@ -1,4 +1,5 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=W32 %s
|
||||
|
||||
---
|
||||
# GCN-LABEL: name: and_execz_mov_vccz
|
||||
@ -318,3 +319,22 @@ body: |
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0, implicit $scc
|
||||
...
|
||||
---
|
||||
# W32-LABEL: name: and_execz_mov_vccz_w32
|
||||
# W32-NOT: S_MOV_
|
||||
# W32-NOT: S_AND_
|
||||
# W32: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
name: and_execz_mov_vccz_w32
|
||||
body: |
|
||||
bb.0:
|
||||
S_NOP 0
|
||||
|
||||
bb.1:
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$sgpr0 = S_MOV_B32 -1
|
||||
$vcc_lo = S_AND_B32 $exec_lo, killed $sgpr0, implicit-def dead $scc
|
||||
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefix=SI --check-prefix=ALL %s
|
||||
; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefix=CI --check-prefix=ALL %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=SI,SICI,ALL %s
|
||||
; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=CI,SICI,ALL %s
|
||||
; RUN: opt -S -mcpu=gfx1010 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10,ALL %s
|
||||
|
||||
; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
|
||||
; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
|
||||
@ -46,7 +47,8 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; ALL: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
|
||||
; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
|
||||
; GFX10: alloca [5 x i32]
|
||||
|
||||
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
|
||||
entry:
|
||||
@ -141,7 +143,9 @@ entry:
|
||||
}
|
||||
|
||||
; ALL-LABEL: @occupancy_6_over(
|
||||
; ALL: alloca [43 x i8]
|
||||
; SICI: alloca [43 x i8]
|
||||
; GFX10-NOT: alloca
|
||||
|
||||
define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
|
||||
entry:
|
||||
%stack = alloca [43 x i8], align 4
|
||||
@ -191,7 +195,9 @@ entry:
|
||||
}
|
||||
|
||||
; ALL-LABEL: @occupancy_8_over(
|
||||
; ALL: alloca [33 x i8]
|
||||
; SICI: alloca [33 x i8]
|
||||
; GFX10-NOT: alloca
|
||||
|
||||
define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
|
||||
entry:
|
||||
%stack = alloca [33 x i8], align 4
|
||||
@ -241,7 +247,9 @@ entry:
|
||||
}
|
||||
|
||||
; ALL-LABEL: @occupancy_9_over(
|
||||
; ALL: alloca [29 x i8]
|
||||
; SICI: alloca [29 x i8]
|
||||
; GFX10-NOT: alloca
|
||||
|
||||
define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
|
||||
entry:
|
||||
%stack = alloca [29 x i8], align 4
|
||||
|
@ -1,6 +1,7 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,ADDR64
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32
|
||||
|
||||
# Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions.
|
||||
#
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: or3:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = or i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -47,6 +48,7 @@ define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: or3_vgpr_a:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_or3_b32 v0, v0, s2, s3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = or i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -69,6 +71,7 @@ define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: or3_vgpr_all2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = or i32 %b, %c
|
||||
%result = or i32 %a, %x
|
||||
@ -91,6 +94,7 @@ define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: or3_vgpr_bc:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_or3_b32 v0, s2, v0, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = or i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -113,6 +117,7 @@ define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: or3_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_or3_b32 v0, v1, v0, 64
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = or i32 64, %b
|
||||
%result = or i32 %x, %a
|
||||
|
@ -49,6 +49,24 @@ body: |
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: s11_vs_vcc{{$}}
|
||||
# GCN: $vgpr0, $vcc_lo = V_ADDC_U32_e64 killed $sgpr14, killed $vgpr0, killed $vcc_lo, 0
|
||||
---
|
||||
name: s11_vs_vcc
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sgpr_32, preferred-register: '$sgpr11' }
|
||||
- { id: 1, class: vgpr_32 }
|
||||
- { id: 2, class: vgpr_32 }
|
||||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = IMPLICIT_DEF
|
||||
$vcc_lo = IMPLICIT_DEF
|
||||
%2, $vcc_lo = V_ADDC_U32_e64 killed %0, killed %1, killed $vcc_lo, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: s0_vs_s16{{$}}
|
||||
# GCN: S_AND_B32 killed renamable $sgpr14, $sgpr0,
|
||||
---
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @shl_add(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_add:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -46,6 +47,7 @@ define amdgpu_ps float @shl_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: shl_add_vgpr_a:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, s2, s3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -68,6 +70,7 @@ define amdgpu_ps float @shl_add_vgpr_all(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_add_vgpr_all:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -90,6 +93,7 @@ define amdgpu_ps float @shl_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: shl_add_vgpr_ab:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, s2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -112,6 +116,7 @@ define amdgpu_ps float @shl_add_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: shl_add_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, 3
|
||||
%result = add i32 %x, %b
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @shl_or(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_or:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -45,6 +46,7 @@ define amdgpu_ps float @shl_or_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_or_vgpr_c:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, s2, s3, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -67,6 +69,7 @@ define amdgpu_ps float @shl_or_vgpr_all2(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_or_vgpr_all2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = or i32 %c, %x
|
||||
@ -89,6 +92,7 @@ define amdgpu_ps float @shl_or_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) {
|
||||
; GFX10-LABEL: shl_or_vgpr_ac:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, s2, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = or i32 %x, %c
|
||||
@ -111,6 +115,7 @@ define amdgpu_ps float @shl_or_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: shl_or_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, 6
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, %b
|
||||
%result = or i32 %x, 6
|
||||
@ -133,6 +138,7 @@ define amdgpu_ps float @shl_or_vgpr_const2(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: shl_or_vgpr_const2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 6, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, 6
|
||||
%result = or i32 %x, %b
|
||||
@ -155,6 +161,7 @@ define amdgpu_ps float @shl_or_vgpr_const_scalar1(i32 inreg %a, i32 %b) {
|
||||
; GFX10-LABEL: shl_or_vgpr_const_scalar1:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, s2, 6, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, 6
|
||||
%result = or i32 %x, %b
|
||||
@ -177,6 +184,7 @@ define amdgpu_ps float @shl_or_vgpr_const_scalar2(i32 %a, i32 inreg %b) {
|
||||
; GFX10-LABEL: shl_or_vgpr_const_scalar2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 6, s2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = shl i32 %a, 6
|
||||
%result = or i32 %x, %b
|
||||
|
@ -92,6 +92,7 @@ entry:
|
||||
; GCN-DAG: s_mov_b32 s1, 1
|
||||
; GCN-DAG: s_mov_b32 s0, 0
|
||||
; SI-NEXT: nop 3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
|
||||
define amdgpu_ps float @smrd_hazard(<4 x i32> inreg %desc) #0 {
|
||||
main_body:
|
||||
|
@ -1,8 +1,10 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}sub_var_var_i1:
|
||||
; GCN: s_xor_b64
|
||||
; WAVE32: s_xor_b32
|
||||
; WAVE64: s_xor_b64
|
||||
define amdgpu_kernel void @sub_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
|
||||
%a = load volatile i1, i1 addrspace(1)* %in0
|
||||
%b = load volatile i1, i1 addrspace(1)* %in1
|
||||
@ -12,7 +14,8 @@ define amdgpu_kernel void @sub_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}sub_var_imm_i1:
|
||||
; GCN: s_not_b64
|
||||
; WAVE32: s_not_b32
|
||||
; WAVE64: s_not_b64
|
||||
define amdgpu_kernel void @sub_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) {
|
||||
%a = load volatile i1, i1 addrspace(1)* %in
|
||||
%sub = sub i1 %a, 1
|
||||
@ -22,7 +25,8 @@ define amdgpu_kernel void @sub_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)
|
||||
|
||||
; GCN-LABEL: {{^}}sub_i1_cf:
|
||||
; GCN: ; %endif
|
||||
; GCN: s_not_b64
|
||||
; WAVE32: s_not_b32
|
||||
; WAVE64: s_not_b64
|
||||
define amdgpu_kernel void @sub_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
1140
test/CodeGen/AMDGPU/wave32.ll
Normal file
1140
test/CodeGen/AMDGPU/wave32.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,7 @@ define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor3:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = xor i32 %x, %c
|
||||
@ -33,6 +34,7 @@ define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: xor3_vgpr_b:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, s2, v0, s3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = xor i32 %x, %c
|
||||
@ -50,6 +52,7 @@ define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor3_vgpr_all2:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, v1, v2, v0
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %b, %c
|
||||
%result = xor i32 %a, %x
|
||||
@ -67,6 +70,7 @@ define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor3_vgpr_bc:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, s2, v0, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = xor i32 %x, %c
|
||||
@ -84,6 +88,7 @@ define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: xor3_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, 16
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = xor i32 %x, 16
|
||||
@ -102,6 +107,7 @@ define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x
|
||||
; GFX10-LABEL: xor3_multiuse_outer:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v3
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%inner = xor i32 %a, %b
|
||||
@ -123,6 +129,7 @@ define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor3_multiuse_inner:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_xor_b32_e32 v1, v0, v2
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%inner = xor i32 %a, %b
|
||||
@ -151,6 +158,7 @@ define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float
|
||||
; GFX10-NEXT: v_add_f32_e64 v1, s3, 2.0
|
||||
; GFX10-NEXT: v_add_f32_e64 v2, s2, 1.0
|
||||
; GFX10-NEXT: v_add_f32_e64 v0, 0x40400000, s4
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1
|
||||
; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
|
@ -22,6 +22,7 @@ define amdgpu_ps float @xor_add(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor_add:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -46,6 +47,7 @@ define amdgpu_ps float @xor_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: xor_add_vgpr_a:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xad_u32 v0, v0, s2, s3
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -68,6 +70,7 @@ define amdgpu_ps float @xor_add_vgpr_all(i32 %a, i32 %b, i32 %c) {
|
||||
; GFX10-LABEL: xor_add_vgpr_all:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -90,6 +93,7 @@ define amdgpu_ps float @xor_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
|
||||
; GFX10-LABEL: xor_add_vgpr_ab:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xad_u32 v0, v0, v1, s2
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, %b
|
||||
%result = add i32 %x, %c
|
||||
@ -112,6 +116,7 @@ define amdgpu_ps float @xor_add_vgpr_const(i32 %a, i32 %b) {
|
||||
; GFX10-LABEL: xor_add_vgpr_const:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: v_xad_u32 v0, v0, 3, v1
|
||||
; GFX10-NEXT: ; implicit-def: $vcc_hi
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%x = xor i32 %a, 3
|
||||
%result = add i32 %x, %b
|
||||
|
@ -33,3 +33,13 @@ v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4]
|
||||
|
||||
v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456
|
||||
// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
|
||||
|
||||
//-----------------------------------------------------------------------------------------
|
||||
// v_mad_u64_u32 has operands of different sizes.
|
||||
// When these operands are literals, they are counted as 2 scalar values even if literals are identical.
|
||||
|
||||
v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678
|
||||
// GFX10: v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 ; encoding: [0x05,0x0c,0x76,0xd5,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
|
||||
|
||||
v_mad_u64_u32 v[5:6], s12, s1, 0x12345678, 0x12345678
|
||||
// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
|
||||
|
412
test/MC/AMDGPU/wave32.s
Normal file
412
test/MC/AMDGPU/wave32.s
Normal file
@ -0,0 +1,412 @@
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR %s
|
||||
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR %s
|
||||
|
||||
v_cmp_ge_i32_e32 s0, v0
|
||||
// GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
|
||||
// GFX1064: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
|
||||
|
||||
v_cmp_ge_i32_e32 vcc_lo, s0, v1
|
||||
// GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_cmp_ge_i32_e32 vcc, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
|
||||
|
||||
v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
|
||||
// GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
|
||||
|
||||
v_cmp_class_f32_e32 vcc_lo, s0, v0
|
||||
// GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_cmp_class_f32_e32 vcc, s0, v0
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
|
||||
|
||||
// TODO-GFX10: The following encoding does not match SP3's encoding, which is:
|
||||
// [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
|
||||
v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
// GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
// TODO-GFX10: The following encoding does not match SP3's encoding, which is:
|
||||
// [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
|
||||
v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
|
||||
|
||||
v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
// GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
|
||||
|
||||
v_cndmask_b32_e32 v1, v2, v3,
|
||||
// GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
|
||||
// GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
|
||||
|
||||
v_cndmask_b32_e32 v1, v2, v3, vcc_lo
|
||||
// GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_cndmask_b32_e32 v1, v2, v3, vcc
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
|
||||
|
||||
v_add_co_u32_e32 v2, vcc_lo, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_u32_e32 v2, vcc, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
|
||||
// GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
|
||||
|
||||
v_add_co_ci_u32_e32 v3, v3, v4
|
||||
// GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
|
||||
// GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
|
||||
|
||||
v_sub_co_u32_e32 v2, vcc_lo, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_co_u32_e32 v2, vcc, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_co_u32_e32 v2, vcc_lo, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_co_u32_e32 v2, vcc, s0, v2
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
|
||||
// GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
|
||||
|
||||
v_sub_co_ci_u32_e32 v3, v3, v4
|
||||
// GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
|
||||
// GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
|
||||
|
||||
v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
// GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
|
||||
|
||||
v_subrev_co_ci_u32_e32 v1, 0, v1
|
||||
// GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
|
||||
// GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
|
||||
|
||||
v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: invalid operand
|
||||
// GFX1064-ERR: error: invalid operand
|
||||
|
||||
v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: instruction not supported
|
||||
// GFX1064-ERR: error: instruction not supported
|
||||
|
||||
v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
|
||||
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: invalid operand
|
||||
// GFX1064-ERR: error: invalid operand
|
||||
|
||||
v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: instruction not supported
|
||||
// GFX1064-ERR: error: instruction not supported
|
||||
|
||||
v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: invalid operand
|
||||
// GFX1064-ERR: error: invalid operand
|
||||
|
||||
v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: instruction not supported
|
||||
// GFX1064-ERR: error: instruction not supported
|
||||
|
||||
v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
|
||||
// GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
|
||||
// GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
|
||||
|
||||
v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
|
||||
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
|
||||
|
||||
v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
|
||||
|
||||
v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_add_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_add_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
|
||||
// GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
|
||||
|
||||
v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
|
||||
|
||||
v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_sub_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
|
||||
|
||||
v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_subrev_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: not a valid operand
|
||||
// GFX1064-ERR: error: not a valid operand
|
||||
|
||||
v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
|
||||
// GFX1064-ERR: error: instruction not supported on this GPU
|
||||
|
||||
v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
// GFX1032-ERR: error: instruction not supported on this GPU
|
||||
// GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
|
||||
|
||||
v_add_co_u32 v0, s0, v0, v2
|
||||
// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_add_co_u32_e64 v0, s0, v0, v2
|
||||
// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
// GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_sub_co_u32 v0, s0, v0, v2
|
||||
// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_sub_co_u32_e64 v0, s0, v0, v2
|
||||
// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
// GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_subrev_co_u32 v0, s0, v0, v2
|
||||
// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_subrev_co_u32_e64 v0, s0, v0, v2
|
||||
// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
// GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_add_co_u32 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_add_co_u32_e64 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
|
||||
v_sub_co_u32 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_sub_co_u32_e64 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
|
||||
v_subrev_co_u32 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_subrev_co_u32_e64 v0, s[0:1], v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
|
||||
|
||||
v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
|
||||
v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
|
||||
// GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
|
||||
|
||||
v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
|
||||
// GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
|
||||
|
||||
v_div_scale_f32 v2, s2, v0, v0, v2
|
||||
// GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_div_scale_f32 v2, s[2:3], v0, v0, v2
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
|
||||
|
||||
v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
|
||||
// GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
|
||||
|
||||
v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
|
||||
// GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
|
||||
|
||||
v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
|
||||
// GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
|
||||
// GFX1064-ERR: error: invalid operand for instruction
|
||||
|
||||
v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
|
||||
// GFX1032-ERR: error: invalid operand for instruction
|
||||
// GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
|
||||
|
||||
v_cmpx_neq_f32_e32 v0, v1
|
||||
// GFX1032: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
|
||||
// GFX1064: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
|
||||
|
||||
v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
// GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
|
||||
// GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
|
||||
|
||||
v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
// GFX1032: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
|
||||
// GFX1064: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
|
||||
|
||||
v_cmpx_class_f32_e64 v0, 1
|
||||
// GFX1032: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
|
||||
// GFX1064: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
|
||||
|
||||
v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
// GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]
|
||||
// GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]
|
@ -8,3 +8,9 @@
|
||||
|
||||
# GFX10: s_mov_b32 s105, s104 ; encoding: [0x68,0x03,0xe9,0xbe]
|
||||
0x68,0x03,0xe9,0xbe
|
||||
|
||||
# GFX10: v_cmp_eq_f32_e64 s105, v0, s105
|
||||
0x69,0x00,0x02,0xd4,0x00,0xd3,0x00,0x00
|
||||
|
||||
# GFX10: v_cmp_eq_f32_sdwa s105, v0, s105 src0_sel:DWORD src1_sel:DWORD
|
||||
0xf9,0xd2,0x04,0x7c,0x00,0xe9,0x06,0x86
|
||||
|
164
test/MC/Disassembler/AMDGPU/wave32.txt
Normal file
164
test/MC/Disassembler/AMDGPU/wave32.txt
Normal file
@ -0,0 +1,164 @@
|
||||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
|
||||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64,-wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
|
||||
|
||||
# GFX1032: v_cmp_lt_f32_e32 vcc_lo, s2, v4
|
||||
# GFX1064: v_cmp_lt_f32_e32 vcc, s2, v4
|
||||
0x02,0x08,0x02,0x7c
|
||||
|
||||
# GFX1032: v_cmp_ge_i32_e64 s2, s0, v2
|
||||
# GFX1064: v_cmp_ge_i32_e64 s[2:3], s0, v2
|
||||
0x02,0x00,0x86,0xd4,0x00,0x04,0x02,0x00
|
||||
|
||||
# GFX1032: v_cmp_ge_i32_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
|
||||
# GFX1064: v_cmp_ge_i32_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
|
||||
0xf9,0x04,0x0c,0x7d,0x00,0x00,0x05,0x06
|
||||
|
||||
# GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
|
||||
# GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
|
||||
0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06
|
||||
|
||||
# GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0
|
||||
# GFX1064: v_cmp_class_f32_e32 vcc, s0, v0
|
||||
0x00,0x00,0x10,0x7d
|
||||
|
||||
# GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
# GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06
|
||||
|
||||
# GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
# GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
|
||||
0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06
|
||||
|
||||
# GFX1032: v_cndmask_b32_e32 v5, 0, v2, vcc_lo
|
||||
# GFX1064: v_cndmask_b32_e32 v5, 0, v2, vcc ;
|
||||
0x80,0x04,0x0a,0x02
|
||||
|
||||
# GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
|
||||
# GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ;
|
||||
0x02,0x07,0x02,0x02
|
||||
|
||||
# GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2
|
||||
# GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2
|
||||
0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
|
||||
# GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
|
||||
0x03,0x09,0x06,0x50
|
||||
|
||||
# GFX1032: v_sub_co_u32_e64 v2, vcc_lo, s0, v2
|
||||
# GFX1064: v_sub_co_u32_e64 v2, vcc, s0, v2
|
||||
0x02,0x6a,0x10,0xd7,0x00,0x04,0x02,0x00
|
||||
|
||||
# GFX1032: v_subrev_co_u32_e64 v2, vcc_lo, s0, v2
|
||||
# GFX1064: v_subrev_co_u32_e64 v2, vcc, s0, v2
|
||||
0x02,0x6a,0x19,0xd7,0x00,0x04,0x02,0x00
|
||||
|
||||
# GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
|
||||
# GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
|
||||
0x03,0x09,0x06,0x52
|
||||
|
||||
# GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
|
||||
# GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ;
|
||||
0x80,0x02,0x02,0x54
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06
|
||||
|
||||
# GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
# GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06
|
||||
|
||||
# GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
# GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
|
||||
0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e
|
||||
|
||||
# GFX1032: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# GFX1064: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00
|
||||
|
||||
# FIXME: Results in invalid v_subrev_u16_dpp which apparently has the same encoding but does not exist in GFX10
|
||||
|
||||
# gfx1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# gfx1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# 0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00
|
||||
|
||||
# FIXME: Results in v_mul_lo_u16_dpp
|
||||
|
||||
# gfx1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# gfx1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# 0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00
|
||||
|
||||
# FIXME: gives v_lshlrev_b16_dpp
|
||||
|
||||
# gfx1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
|
||||
# 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00
|
||||
|
||||
# GFX1032: v_add_co_u32_e64 v0, s0, v0, v2
|
||||
# GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2
|
||||
0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00
|
||||
|
||||
# GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2
|
||||
# GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2
|
||||
0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00
|
||||
|
||||
# GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
# GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00
|
||||
|
||||
# GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2
|
||||
# GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2
|
||||
0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00
|
||||
|
||||
# GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
|
||||
# GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
|
||||
0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
|
||||
# GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
|
||||
0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00
|
||||
|
||||
# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
|
||||
# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ;
|
||||
0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01
|
||||
|
||||
# GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2
|
||||
# GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2
|
||||
0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04
|
||||
|
||||
# GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
|
||||
# GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
|
||||
0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04
|
||||
|
||||
# GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
|
||||
# GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
|
||||
0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04
|
||||
|
||||
# GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
|
||||
# GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
|
||||
0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04
|
||||
|
||||
# GFX1032: v_cmpx_neq_f32_e32 v0, v1
|
||||
# GFX1064: v_cmpx_neq_f32_e32 v0, v1
|
||||
0x00,0x03,0x3a,0x7c
|
||||
|
||||
# GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
# GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06
|
||||
|
||||
# GFX1032: v_cmpx_class_f32_e64 v0, 1
|
||||
# GFX1064: v_cmpx_class_f32_e64 v0, 1
|
||||
0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00
|
||||
|
||||
# GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
# GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
|
||||
0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86
|
Loading…
Reference in New Issue
Block a user