1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AMDGPU] gfx1010 core wave32 changes

Differential Revision: https://reviews.llvm.org/D63204

llvm-svn: 363934
This commit is contained in:
Stanislav Mekhanoshin 2019-06-20 15:08:34 +00:00
parent 222088eed6
commit 2653a95667
32 changed files with 1933 additions and 60 deletions

View File

@ -777,7 +777,7 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureWavefrontSize32,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
@ -795,7 +795,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
FeatureDot5Insts,
FeatureDot6Insts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureWavefrontSize32,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
@ -812,7 +812,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
FeatureDot5Insts,
FeatureDot6Insts,
FeatureNSAEncoding,
FeatureWavefrontSize64,
FeatureWavefrontSize32,
FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,

View File

@ -50,19 +50,19 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUIfOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
>;
def AMDGPUElseOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
>;
def AMDGPULoopOp : SDTypeProfile<0, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
[SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>]
>;
def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
[SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>]
>;
//===----------------------------------------------------------------------===//

View File

@ -101,6 +101,12 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
return addOperand(Inst, MCOperand::createImm(Imm));
}
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
return addOperand(Inst, DAsm->decodeBoolReg(Val));
}
#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
static DecodeStatus StaticDecoderName(MCInst &Inst, \
unsigned Imm, \

View File

@ -946,6 +946,15 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF)
const override {
if (OpNum >= TID.getNumOperands())
return nullptr;
return RI.getRegClass(TID.OpInfo[OpNum].RegClass);
}
void fixImplicitOperands(MachineInstr &MI) const;
};

View File

@ -766,6 +766,15 @@ def VOPDstS64orS32 : BoolRC {
let PrintMethod = "printVOPDst";
}
// SCSrc_i1 is the operand for pseudo instructions only.
// Boolean immeadiates shall not be exposed to codegen instructions.
def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_REG_IMM_INT32";
let ParserMatchClass = BoolReg;
let DecoderMethod = "decodeBoolReg";
}
// ===----------------------------------------------------------------------===//
// ExpSrc* Special cases for exp src operands which are printed as
// "off" depending on en operand.
@ -804,11 +813,12 @@ def SDWASrc_i16 : SDWASrc<i16>;
def SDWASrc_f32 : SDWASrc<f32>;
def SDWASrc_f16 : SDWASrc<f16>;
def SDWAVopcDst : VOPDstOperand<SReg_64> {
def SDWAVopcDst : BoolRC {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_SDWA_VOPC_DST";
let EncoderMethod = "getSDWAVopcDstEncoding";
let DecoderMethod = "decodeSDWAVopcDst";
let PrintMethod = "printVOPDst";
}
class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
@ -940,11 +950,6 @@ def f32kimm : kimmOperand<i32>;
def KImmFP16MatchClass : KImmMatchClass<16>;
def f16kimm : kimmOperand<i16>;
def VOPDstS64 : VOPDstOperand <SReg_64> {
let PrintMethod = "printVOPDst";
}
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithFP"#opSize#"InputMods";
let ParserMethod = "parseRegOrImmWithFPInputMods";
@ -1237,7 +1242,7 @@ class getVALUDstForVT<ValueType VT> {
!if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
!if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
VOPDstOperand<SReg_64>)))); // else VT == i1
VOPDstS64orS32)))); // else VT == i1
}
// Returns the register class to use for the destination of VOP[12C]
@ -1313,7 +1318,7 @@ class getVOP3SrcForVT<ValueType VT> {
VSrc_f64,
VSrc_b64),
!if(!eq(VT.Value, i1.Value),
SCSrc_i1,
SSrc_i1,
!if(isFP,
!if(!eq(VT.Value, f16.Value),
VSrc_f16,

View File

@ -121,14 +121,14 @@ def WWM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
def ENTER_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins i64imm:$src0)> {
def ENTER_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
let Defs = [EXEC];
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
}
def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
@ -161,11 +161,11 @@ def S_SUB_U64_PSEUDO : SPseudoInstSI <
>;
def S_ADD_U64_CO_PSEUDO : SPseudoInstSI <
(outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
(outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
>;
def S_SUB_U64_CO_PSEUDO : SPseudoInstSI <
(outs SReg_64:$vdst, VOPDstS64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
(outs SReg_64:$vdst, VOPDstS64orS32:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1)
>;
} // End usesCustomInserter = 1, Defs = [SCC]
@ -233,30 +233,30 @@ let isTerminator = 1 in {
let OtherPredicates = [EnableLateCFGStructurize] in {
def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
(outs),
(ins SReg_64:$vcc, brtarget:$target),
(ins SReg_1:$vcc, brtarget:$target),
[(brcond i1:$vcc, bb:$target)]> {
let Size = 12;
}
}
def SI_IF: CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
[(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
(outs SReg_1:$dst), (ins SReg_1:$vcc, brtarget:$target),
[(set i1:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
let Constraints = "";
let Size = 12;
let hasSideEffects = 1;
}
def SI_ELSE : CFPseudoInstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
(outs SReg_1:$dst),
(ins SReg_1:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
let Size = 12;
let hasSideEffects = 1;
}
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
[(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
(outs), (ins SReg_1:$saved, brtarget:$target),
[(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {
let Size = 8;
let isBranch = 1;
let hasSideEffects = 1;
@ -265,8 +265,7 @@ def SI_LOOP : CFPseudoInstSI <
} // End isTerminator = 1
def SI_END_CF : CFPseudoInstSI <
(outs), (ins SReg_64:$saved),
[(int_amdgcn_end_cf i64:$saved)], 1, 1> {
(outs), (ins SReg_1:$saved), [], 1, 1> {
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
@ -276,8 +275,7 @@ def SI_END_CF : CFPseudoInstSI <
}
def SI_IF_BREAK : CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
[(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
(outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
@ -303,7 +301,7 @@ multiclass PseudoInstKill <dag ins> {
}
}
defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>;
defm SI_KILL_I1 : PseudoInstKill <(ins SCSrc_i1:$src, i1imm:$killvalue)>;
defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>;
let Defs = [EXEC,VCC] in
@ -322,7 +320,7 @@ def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
}
def SI_PS_LIVE : PseudoInstSI <
(outs SReg_64:$dst), (ins),
(outs SReg_1:$dst), (ins),
[(set i1:$dst, (int_amdgcn_ps_live))]> {
let SALU = 1;
}
@ -584,7 +582,7 @@ def : GCNPat<
>;
def : GCNPat<
(AMDGPUelse i64:$src, bb:$target),
(AMDGPUelse i1:$src, bb:$target),
(SI_ELSE $src, $target, 0)
>;

View File

@ -733,8 +733,6 @@ def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
def SCSrc_i1 : RegisterOperand<SReg_64_XEXEC>;
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//

View File

@ -344,7 +344,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp
let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
}
// Write out to vcc or arbitrary SGPR and read in from vcc or
@ -358,7 +358,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*
let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
let AsmDPP16 = AsmDPP#"$fi";
let Outs32 = (outs DstRC:$vdst);
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
// Suppress src2 implied by type since the 32-bit encoding uses an
// implicit VCC use.

View File

@ -183,7 +183,7 @@ class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
let HasModifiers = 0;
let HasClamp = 0;
let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
}
@ -203,7 +203,7 @@ def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
// FIXME: Hack to stop printing _e64
let DstRC = RegisterOperand<VReg_64>;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2$clamp";
}

View File

@ -56,7 +56,7 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt
let Asm32 = "$src0, $src1";
// The destination for 32-bit encoding is implicit.
let HasDst32 = 0;
let Outs64 = (outs VOPDstS64:$sdst);
let Outs64 = (outs VOPDstS64orS32:$sdst);
list<SchedReadWrite> Schedule = sched;
}

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @add3(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: add3:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = add i32 %x, %c
@ -46,6 +47,7 @@ define amdgpu_ps float @mad_no_add3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; GFX10-LABEL: mad_no_add3:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_mad_u32_u24 v0, v0, v1, v4
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_mad_u32_u24 v0, v2, v3, v0
; GFX10-NEXT: ; return to shader part epilog
%a0 = shl i32 %a, 8
@ -85,6 +87,7 @@ define amdgpu_ps float @add3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: add3_vgpr_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, s3, s2, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = add i32 %x, %c
@ -107,6 +110,7 @@ define amdgpu_ps float @add3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: add3_vgpr_all2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, v1, v2, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %b, %c
%result = add i32 %a, %x
@ -129,6 +133,7 @@ define amdgpu_ps float @add3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
; GFX10-LABEL: add3_vgpr_bc:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, s2, v0, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = add i32 %x, %c
@ -151,6 +156,7 @@ define amdgpu_ps float @add3_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: add3_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, v0, v1, 16
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = add i32 %x, 16
@ -175,6 +181,7 @@ define amdgpu_ps <2 x float> @add3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x
; GFX10-LABEL: add3_multiuse_outer:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add3_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v3
; GFX10-NEXT: ; return to shader part epilog
%inner = add i32 %a, %b
@ -202,6 +209,7 @@ define amdgpu_ps <2 x float> @add3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: add3_multiuse_inner:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_add_nc_u32_e32 v1, v0, v2
; GFX10-NEXT: ; return to shader part epilog
%inner = add i32 %a, %b
@ -240,6 +248,7 @@ define amdgpu_ps float @add3_uniform_vgpr(float inreg %a, float inreg %b, float
; GFX10-NEXT: v_add_f32_e64 v1, s3, 2.0
; GFX10-NEXT: v_add_f32_e64 v2, s2, 1.0
; GFX10-NEXT: v_add_f32_e64 v0, 0x40400000, s4
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v0
; GFX10-NEXT: ; return to shader part epilog

View File

@ -1,8 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
; GCN-LABEL: {{^}}add_var_var_i1:
; GCN: s_xor_b64
; GFX9: s_xor_b64
; GFX10: s_xor_b32
define amdgpu_kernel void @add_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
%a = load volatile i1, i1 addrspace(1)* %in0
%b = load volatile i1, i1 addrspace(1)* %in1
@ -12,7 +14,8 @@ define amdgpu_kernel void @add_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)
}
; GCN-LABEL: {{^}}add_var_imm_i1:
; GCN: s_not_b64
; GFX9: s_not_b64
; GFX10: s_not_b32
define amdgpu_kernel void @add_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) {
%a = load volatile i1, i1 addrspace(1)* %in
%add = add i1 %a, 1
@ -22,7 +25,8 @@ define amdgpu_kernel void @add_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)
; GCN-LABEL: {{^}}add_i1_cf:
; GCN: ; %endif
; GCN: s_not_b64
; GFX9: s_not_b64
; GFX10: s_not_b32
define amdgpu_kernel void @add_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: add_shl:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = shl i32 %x, %c
@ -45,6 +46,7 @@ define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) {
; GFX10-LABEL: add_shl_vgpr_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_lshl_u32 v0, s2, s3, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = shl i32 %x, %c
@ -67,6 +69,7 @@ define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) {
; GFX10-LABEL: add_shl_vgpr_ac:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = shl i32 %x, %c
@ -89,6 +92,7 @@ define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: add_shl_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, 9
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, %b
%result = shl i32 %x, 9
@ -112,6 +116,7 @@ define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) {
; GFX10-LABEL: add_shl_vgpr_const_inline_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x7e800
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, 1012
%result = shl i32 %x, 9
@ -138,6 +143,7 @@ define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) {
; GFX10-LABEL: add_shl_vgpr_inline_const_x2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x600
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = add i32 %a, 3
%result = shl i32 %x, 9

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @and_or(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: and_or:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 %a, %b
%result = or i32 %x, %c
@ -46,6 +47,7 @@ define amdgpu_ps float @and_or_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: and_or_vgpr_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, s2, v0, s3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 %a, %b
%result = or i32 %x, %c
@ -68,6 +70,7 @@ define amdgpu_ps float @and_or_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: and_or_vgpr_ab:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, v0, v1, s2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 %a, %b
%result = or i32 %x, %c
@ -90,6 +93,7 @@ define amdgpu_ps float @and_or_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: and_or_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, v0, 4, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 4, %a
%result = or i32 %x, %b
@ -113,6 +117,7 @@ define amdgpu_ps float @and_or_vgpr_const_inline_const(i32 %a) {
; GFX10-LABEL: and_or_vgpr_const_inline_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, v0, 20, 0x808
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 20, %a
%result = or i32 %x, 2056
@ -135,6 +140,7 @@ define amdgpu_ps float @and_or_vgpr_inline_const_x2(i32 %a) {
; GFX10-LABEL: and_or_vgpr_inline_const_x2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_and_or_b32 v0, v0, 4, 1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = and i32 4, %a
%result = or i32 %x, 1

View File

@ -1,7 +1,7 @@
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s
; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
@var = addrspace(1) global float 0.0

View File

@ -1,9 +1,23 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,WAVE64 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,WAVE32 %s
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo14:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ffc, [[FI]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
define amdgpu_kernel void @scratch_buffer_known_high_masklo14() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%toint = ptrtoint i32 addrspace(5)* %alloca to i32
%masked = and i32 %toint, 16383
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@ -15,8 +29,11 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
; GCN-NOT: [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
; WAVE64-NOT: [[FI]]
; WAVE64: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
; WAVE32: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x1fffc, [[FI]]
; WAVE32: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@ -29,7 +46,7 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
; GCN-NOT: [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca

View File

@ -1,4 +1,5 @@
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=W32 %s
---
# GCN-LABEL: name: and_execz_mov_vccz
@ -318,3 +319,22 @@ body: |
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
S_ENDPGM 0, implicit $scc
...
---
# W32-LABEL: name: and_execz_mov_vccz_w32
# W32-NOT: S_MOV_
# W32-NOT: S_AND_
# W32: S_CBRANCH_EXECZ %bb.1, implicit $exec
name: and_execz_mov_vccz_w32
body: |
bb.0:
S_NOP 0
bb.1:
S_NOP 0
bb.2:
$sgpr0 = S_MOV_B32 -1
$vcc_lo = S_AND_B32 $exec_lo, killed $sgpr0, implicit-def dead $scc
S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
S_ENDPGM 0
...

View File

@ -1,5 +1,6 @@
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefix=SI --check-prefix=ALL %s
; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefix=CI --check-prefix=ALL %s
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=SI,SICI,ALL %s
; RUN: opt -S -mcpu=tonga -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=CI,SICI,ALL %s
; RUN: opt -S -mcpu=gfx1010 -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck --check-prefixes=GFX10,ALL %s
; SI-NOT: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
; CI: @promote_alloca_size_63.stack = internal unnamed_addr addrspace(3) global [63 x [5 x i32]] undef, align 4
@ -46,7 +47,8 @@ entry:
ret void
}
; ALL: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
; SICI: @promote_alloca_size_1600.stack = internal unnamed_addr addrspace(3) global [1600 x [5 x i32]] undef, align 4
; GFX10: alloca [5 x i32]
define amdgpu_kernel void @promote_alloca_size_1600(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #2 {
entry:
@ -141,7 +143,9 @@ entry:
}
; ALL-LABEL: @occupancy_6_over(
; ALL: alloca [43 x i8]
; SICI: alloca [43 x i8]
; GFX10-NOT: alloca
define amdgpu_kernel void @occupancy_6_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #5 {
entry:
%stack = alloca [43 x i8], align 4
@ -191,7 +195,9 @@ entry:
}
; ALL-LABEL: @occupancy_8_over(
; ALL: alloca [33 x i8]
; SICI: alloca [33 x i8]
; GFX10-NOT: alloca
define amdgpu_kernel void @occupancy_8_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #6 {
entry:
%stack = alloca [33 x i8], align 4
@ -241,7 +247,9 @@ entry:
}
; ALL-LABEL: @occupancy_9_over(
; ALL: alloca [29 x i8]
; SICI: alloca [29 x i8]
; GFX10-NOT: alloca
define amdgpu_kernel void @occupancy_9_over(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %in) #7 {
entry:
%stack = alloca [29 x i8], align 4

View File

@ -1,6 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,ADDR64
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32
# Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions.
#

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: or3:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = or i32 %a, %b
%result = or i32 %x, %c
@ -47,6 +48,7 @@ define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
; GFX10-LABEL: or3_vgpr_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_or3_b32 v0, v0, s2, s3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = or i32 %a, %b
%result = or i32 %x, %c
@ -69,6 +71,7 @@ define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: or3_vgpr_all2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = or i32 %b, %c
%result = or i32 %a, %x
@ -91,6 +94,7 @@ define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
; GFX10-LABEL: or3_vgpr_bc:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_or3_b32 v0, s2, v0, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = or i32 %a, %b
%result = or i32 %x, %c
@ -113,6 +117,7 @@ define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: or3_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_or3_b32 v0, v1, v0, 64
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = or i32 64, %b
%result = or i32 %x, %a

View File

@ -49,6 +49,24 @@ body: |
S_ENDPGM 0
...
# GCN-LABEL: s11_vs_vcc{{$}}
# GCN: $vgpr0, $vcc_lo = V_ADDC_U32_e64 killed $sgpr14, killed $vgpr0, killed $vcc_lo, 0
---
name: s11_vs_vcc
tracksRegLiveness: true
registers:
- { id: 0, class: sgpr_32, preferred-register: '$sgpr11' }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
$vcc_lo = IMPLICIT_DEF
%2, $vcc_lo = V_ADDC_U32_e64 killed %0, killed %1, killed $vcc_lo, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: s0_vs_s16{{$}}
# GCN: S_AND_B32 killed renamable $sgpr14, $sgpr0,
---

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @shl_add(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: shl_add:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = add i32 %x, %c
@ -46,6 +47,7 @@ define amdgpu_ps float @shl_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
; GFX10-LABEL: shl_add_vgpr_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, s2, s3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = add i32 %x, %c
@ -68,6 +70,7 @@ define amdgpu_ps float @shl_add_vgpr_all(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: shl_add_vgpr_all:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = add i32 %x, %c
@ -90,6 +93,7 @@ define amdgpu_ps float @shl_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: shl_add_vgpr_ab:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, v1, s2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = add i32 %x, %c
@ -112,6 +116,7 @@ define amdgpu_ps float @shl_add_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: shl_add_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, 3
%result = add i32 %x, %b

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @shl_or(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: shl_or:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = or i32 %x, %c
@ -45,6 +46,7 @@ define amdgpu_ps float @shl_or_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) {
; GFX10-LABEL: shl_or_vgpr_c:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, s2, s3, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = or i32 %x, %c
@ -67,6 +69,7 @@ define amdgpu_ps float @shl_or_vgpr_all2(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: shl_or_vgpr_all2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = or i32 %c, %x
@ -89,6 +92,7 @@ define amdgpu_ps float @shl_or_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) {
; GFX10-LABEL: shl_or_vgpr_ac:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, s2, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = or i32 %x, %c
@ -111,6 +115,7 @@ define amdgpu_ps float @shl_or_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: shl_or_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, v1, 6
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, %b
%result = or i32 %x, 6
@ -133,6 +138,7 @@ define amdgpu_ps float @shl_or_vgpr_const2(i32 %a, i32 %b) {
; GFX10-LABEL: shl_or_vgpr_const2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 6, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, 6
%result = or i32 %x, %b
@ -155,6 +161,7 @@ define amdgpu_ps float @shl_or_vgpr_const_scalar1(i32 inreg %a, i32 %b) {
; GFX10-LABEL: shl_or_vgpr_const_scalar1:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, s2, 6, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, 6
%result = or i32 %x, %b
@ -177,6 +184,7 @@ define amdgpu_ps float @shl_or_vgpr_const_scalar2(i32 %a, i32 inreg %b) {
; GFX10-LABEL: shl_or_vgpr_const_scalar2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_lshl_or_b32 v0, v0, 6, s2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = shl i32 %a, 6
%result = or i32 %x, %b

View File

@ -92,6 +92,7 @@ entry:
; GCN-DAG: s_mov_b32 s1, 1
; GCN-DAG: s_mov_b32 s0, 0
; SI-NEXT: nop 3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
define amdgpu_ps float @smrd_hazard(<4 x i32> inreg %desc) #0 {
main_body:

View File

@ -1,8 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s
; GCN-LABEL: {{^}}sub_var_var_i1:
; GCN: s_xor_b64
; WAVE32: s_xor_b32
; WAVE64: s_xor_b64
define amdgpu_kernel void @sub_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
%a = load volatile i1, i1 addrspace(1)* %in0
%b = load volatile i1, i1 addrspace(1)* %in1
@ -12,7 +14,8 @@ define amdgpu_kernel void @sub_var_var_i1(i1 addrspace(1)* %out, i1 addrspace(1)
}
; GCN-LABEL: {{^}}sub_var_imm_i1:
; GCN: s_not_b64
; WAVE32: s_not_b32
; WAVE64: s_not_b64
define amdgpu_kernel void @sub_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) {
%a = load volatile i1, i1 addrspace(1)* %in
%sub = sub i1 %a, 1
@ -22,7 +25,8 @@ define amdgpu_kernel void @sub_var_imm_i1(i1 addrspace(1)* %out, i1 addrspace(1)
; GCN-LABEL: {{^}}sub_i1_cf:
; GCN: ; %endif
; GCN: s_not_b64
; WAVE32: s_not_b32
; WAVE64: s_not_b64
define amdgpu_kernel void @sub_i1_cf(i1 addrspace(1)* %out, i1 addrspace(1)* %a, i1 addrspace(1)* %b) {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,7 @@ define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor3:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = xor i32 %x, %c
@ -33,6 +34,7 @@ define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: xor3_vgpr_b:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, s2, v0, s3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = xor i32 %x, %c
@ -50,6 +52,7 @@ define amdgpu_ps float @xor3_vgpr_all2(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor3_vgpr_all2:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, v1, v2, v0
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %b, %c
%result = xor i32 %a, %x
@ -67,6 +70,7 @@ define amdgpu_ps float @xor3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor3_vgpr_bc:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, s2, v0, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = xor i32 %x, %c
@ -84,6 +88,7 @@ define amdgpu_ps float @xor3_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: xor3_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, 16
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = xor i32 %x, 16
@ -102,6 +107,7 @@ define amdgpu_ps <2 x float> @xor3_multiuse_outer(i32 %a, i32 %b, i32 %c, i32 %x
; GFX10-LABEL: xor3_multiuse_outer:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor3_b32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v3
; GFX10-NEXT: ; return to shader part epilog
%inner = xor i32 %a, %b
@ -123,6 +129,7 @@ define amdgpu_ps <2 x float> @xor3_multiuse_inner(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor3_multiuse_inner:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_xor_b32_e32 v1, v0, v2
; GFX10-NEXT: ; return to shader part epilog
%inner = xor i32 %a, %b
@ -151,6 +158,7 @@ define amdgpu_ps float @xor3_uniform_vgpr(float inreg %a, float inreg %b, float
; GFX10-NEXT: v_add_f32_e64 v1, s3, 2.0
; GFX10-NEXT: v_add_f32_e64 v2, s2, 1.0
; GFX10-NEXT: v_add_f32_e64 v0, 0x40400000, s4
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: v_xor_b32_e32 v1, v2, v1
; GFX10-NEXT: v_xor_b32_e32 v0, v1, v0
; GFX10-NEXT: ; return to shader part epilog

View File

@ -22,6 +22,7 @@ define amdgpu_ps float @xor_add(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor_add:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = add i32 %x, %c
@ -46,6 +47,7 @@ define amdgpu_ps float @xor_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) {
; GFX10-LABEL: xor_add_vgpr_a:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xad_u32 v0, v0, s2, s3
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = add i32 %x, %c
@ -68,6 +70,7 @@ define amdgpu_ps float @xor_add_vgpr_all(i32 %a, i32 %b, i32 %c) {
; GFX10-LABEL: xor_add_vgpr_all:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = add i32 %x, %c
@ -90,6 +93,7 @@ define amdgpu_ps float @xor_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) {
; GFX10-LABEL: xor_add_vgpr_ab:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xad_u32 v0, v0, v1, s2
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, %b
%result = add i32 %x, %c
@ -112,6 +116,7 @@ define amdgpu_ps float @xor_add_vgpr_const(i32 %a, i32 %b) {
; GFX10-LABEL: xor_add_vgpr_const:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_xad_u32 v0, v0, 3, v1
; GFX10-NEXT: ; implicit-def: $vcc_hi
; GFX10-NEXT: ; return to shader part epilog
%x = xor i32 %a, 3
%result = add i32 %x, %b

View File

@ -33,3 +33,13 @@ v_div_fmas_f64 v[5:6], v[1:2], s[2:3], v[3:4]
v_div_fmas_f64 v[5:6], v[1:2], s[2:3], 0x123456
// GFX10-ERR: error: invalid operand (violates constant bus restrictions)
//-----------------------------------------------------------------------------------------
// v_mad_u64_u32 has operands of different sizes.
// When these operands are literals, they are counted as 2 scalar values even if literals are identical.
v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678
// GFX10: v_mad_u64_u32 v[5:6], s12, v1, 0x12345678, 0x12345678 ; encoding: [0x05,0x0c,0x76,0xd5,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12]
v_mad_u64_u32 v[5:6], s12, s1, 0x12345678, 0x12345678
// GFX10-ERR: error: invalid operand (violates constant bus restrictions)

412
test/MC/AMDGPU/wave32.s Normal file
View File

@ -0,0 +1,412 @@
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR %s
// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR %s
v_cmp_ge_i32_e32 s0, v0
// GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
// GFX1064: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
v_cmp_ge_i32_e32 vcc_lo, s0, v1
// GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
// GFX1064-ERR: error: instruction not supported on this GPU
v_cmp_ge_i32_e32 vcc, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
// GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
// GFX1064-ERR: error: invalid operand for instruction
v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
v_cmp_class_f32_e32 vcc_lo, s0, v0
// GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
// GFX1064-ERR: error: instruction not supported on this GPU
v_cmp_class_f32_e32 vcc, s0, v0
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
// TODO-GFX10: The following encoding does not match SP3's encoding, which is:
// [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
// GFX1064-ERR: error: invalid operand for instruction
// TODO-GFX10: The following encoding does not match SP3's encoding, which is:
// [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
// GFX1064-ERR: error: invalid operand for instruction
v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
v_cndmask_b32_e32 v1, v2, v3,
// GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
// GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
v_cndmask_b32_e32 v1, v2, v3, vcc_lo
// GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
// GFX1064-ERR: error: instruction not supported on this GPU
v_cndmask_b32_e32 v1, v2, v3, vcc
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
v_add_co_u32_e32 v2, vcc_lo, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_u32_e32 v2, vcc, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
// GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
v_add_co_ci_u32_e32 v3, v3, v4
// GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
// GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
v_sub_co_u32_e32 v2, vcc_lo, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_sub_co_u32_e32 v2, vcc, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_subrev_co_u32_e32 v2, vcc_lo, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_subrev_co_u32_e32 v2, vcc, s0, v2
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064-ERR: error: instruction not supported on this GPU
v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
// GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
// GFX1064-ERR: error: instruction not supported on this GPU
v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
v_sub_co_ci_u32_e32 v3, v3, v4
// GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
// GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
// GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
// GFX1064-ERR: error: instruction not supported on this GPU
v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
v_subrev_co_ci_u32_e32 v1, 0, v1
// GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
// GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: invalid operand
// GFX1064-ERR: error: invalid operand
v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: instruction not supported
// GFX1064-ERR: error: instruction not supported
v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: invalid operand
// GFX1064-ERR: error: invalid operand
v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: instruction not supported
// GFX1064-ERR: error: instruction not supported
v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: invalid operand
// GFX1064-ERR: error: invalid operand
v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: instruction not supported
// GFX1064-ERR: error: instruction not supported
v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
// GFX1064-ERR: error: instruction not supported on this GPU
v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
// GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
// GFX1064-ERR: error: instruction not supported on this GPU
v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
// GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_add_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_add_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
// GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
// GFX1064-ERR: error: instruction not supported on this GPU
v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_sub_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
// GFX1064-ERR: error: instruction not supported on this GPU
v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_subrev_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: not a valid operand
// GFX1064-ERR: error: not a valid operand
v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
// GFX1064-ERR: error: instruction not supported on this GPU
v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
// GFX1032-ERR: error: instruction not supported on this GPU
// GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
v_add_co_u32 v0, s0, v0, v2
// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_add_co_u32_e64 v0, s0, v0, v2
// GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
// GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_sub_co_u32 v0, s0, v0, v2
// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_sub_co_u32_e64 v0, s0, v0, v2
// GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
// GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_subrev_co_u32 v0, s0, v0, v2
// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_subrev_co_u32_e64 v0, s0, v0, v2
// GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
// GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_add_co_u32 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
v_add_co_u32_e64 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
v_sub_co_u32 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
v_sub_co_u32_e64 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
v_subrev_co_u32 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
v_subrev_co_u32_e64 v0, s[0:1], v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
// GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
// GFX1064-ERR: error: invalid operand for instruction
v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
// GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
// GFX1064-ERR: error: invalid operand for instruction
v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
v_div_scale_f32 v2, s2, v0, v0, v2
// GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
// GFX1064-ERR: error: invalid operand for instruction
v_div_scale_f32 v2, s[2:3], v0, v0, v2
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
// GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
// GFX1064-ERR: error: invalid operand for instruction
v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
// GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
// GFX1064-ERR: error: invalid operand for instruction
v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
// GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
// GFX1064-ERR: error: invalid operand for instruction
v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
// GFX1032-ERR: error: invalid operand for instruction
// GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
v_cmpx_neq_f32_e32 v0, v1
// GFX1032: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
// GFX1064: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
// GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
// GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
// GFX1032: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
// GFX1064: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
v_cmpx_class_f32_e64 v0, 1
// GFX1032: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
// GFX1064: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
// GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]
// GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]

View File

@ -8,3 +8,9 @@
# GFX10: s_mov_b32 s105, s104 ; encoding: [0x68,0x03,0xe9,0xbe]
0x68,0x03,0xe9,0xbe
# GFX10: v_cmp_eq_f32_e64 s105, v0, s105
0x69,0x00,0x02,0xd4,0x00,0xd3,0x00,0x00
# GFX10: v_cmp_eq_f32_sdwa s105, v0, s105 src0_sel:DWORD src1_sel:DWORD
0xf9,0xd2,0x04,0x7c,0x00,0xe9,0x06,0x86

View File

@ -0,0 +1,164 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64,-wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
# GFX1032: v_cmp_lt_f32_e32 vcc_lo, s2, v4
# GFX1064: v_cmp_lt_f32_e32 vcc, s2, v4
0x02,0x08,0x02,0x7c
# GFX1032: v_cmp_ge_i32_e64 s2, s0, v2
# GFX1064: v_cmp_ge_i32_e64 s[2:3], s0, v2
0x02,0x00,0x86,0xd4,0x00,0x04,0x02,0x00
# GFX1032: v_cmp_ge_i32_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
# GFX1064: v_cmp_ge_i32_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
0xf9,0x04,0x0c,0x7d,0x00,0x00,0x05,0x06
# GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
# GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06
# GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0
# GFX1064: v_cmp_class_f32_e32 vcc, s0, v0
0x00,0x00,0x10,0x7d
# GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
# GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06
# GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
# GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06
# GFX1032: v_cndmask_b32_e32 v5, 0, v2, vcc_lo
# GFX1064: v_cndmask_b32_e32 v5, 0, v2, vcc ;
0x80,0x04,0x0a,0x02
# GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
# GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ;
0x02,0x07,0x02,0x02
# GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2
# GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2
0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00
# GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
# GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
0x03,0x09,0x06,0x50
# GFX1032: v_sub_co_u32_e64 v2, vcc_lo, s0, v2
# GFX1064: v_sub_co_u32_e64 v2, vcc, s0, v2
0x02,0x6a,0x10,0xd7,0x00,0x04,0x02,0x00
# GFX1032: v_subrev_co_u32_e64 v2, vcc_lo, s0, v2
# GFX1064: v_subrev_co_u32_e64 v2, vcc, s0, v2
0x02,0x6a,0x19,0xd7,0x00,0x04,0x02,0x00
# GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
# GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
0x03,0x09,0x06,0x52
# GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
# GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ;
0x80,0x02,0x02,0x54
# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06
# GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
# GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06
# GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
# GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06
# GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
# GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e
# GFX1032: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# GFX1064: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00
# FIXME: Results in invalid v_subrev_u16_dpp which apparently has the same encoding but does not exist in GFX10
# gfx1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# gfx1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# 0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00
# FIXME: Results in v_mul_lo_u16_dpp
# gfx1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# gfx1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# 0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00
# FIXME: gives v_lshlrev_b16_dpp
# gfx1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
# 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00
# GFX1032: v_add_co_u32_e64 v0, s0, v0, v2
# GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2
0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00
# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00
# GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2
# GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2
0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00
# GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
# GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00
# GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2
# GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2
0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00
# GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
# GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00
# GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
# GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00
# GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
# GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ;
0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01
# GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2
# GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2
0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04
# GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
# GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04
# GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
# GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04
# GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
# GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04
# GFX1032: v_cmpx_neq_f32_e32 v0, v1
# GFX1064: v_cmpx_neq_f32_e32 v0, v1
0x00,0x03,0x3a,0x7c
# GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
# GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06
# GFX1032: v_cmpx_class_f32_e64 v0, 1
# GFX1064: v_cmpx_class_f32_e64 v0, 1
0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00
# GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
# GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86