mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[AMDGPU][MC] Added support of 256- and 512-bit tuples of ttmp registers
See bug 35561: https://bugs.llvm.org/show_bug.cgi?id=35561 This patch also affects implementation of SGPR and VGPR registers though changes are cosmetic. Reviewers: artem.tamazov, arsenm Differential Revision: https://reviews.llvm.org/D41437 llvm-svn: 321359
This commit is contained in:
parent
7212902956
commit
b8925d0036
@ -695,18 +695,24 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
|
||||
IsSGPR = false;
|
||||
Width = 3;
|
||||
} else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
|
||||
assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
|
||||
"trap handler registers should not be used");
|
||||
IsSGPR = true;
|
||||
Width = 4;
|
||||
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 4;
|
||||
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
|
||||
assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
|
||||
"trap handler registers should not be used");
|
||||
IsSGPR = true;
|
||||
Width = 8;
|
||||
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
|
||||
IsSGPR = false;
|
||||
Width = 8;
|
||||
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
|
||||
assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
|
||||
"trap handler registers should not be used");
|
||||
IsSGPR = true;
|
||||
Width = 16;
|
||||
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
|
||||
|
@ -1495,6 +1495,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
|
||||
case 1: return AMDGPU::TTMP_32RegClassID;
|
||||
case 2: return AMDGPU::TTMP_64RegClassID;
|
||||
case 4: return AMDGPU::TTMP_128RegClassID;
|
||||
case 8: return AMDGPU::TTMP_256RegClassID;
|
||||
case 16: return AMDGPU::TTMP_512RegClassID;
|
||||
}
|
||||
} else if (Is == IS_SGPR) {
|
||||
switch (RegWidth) {
|
||||
@ -1502,8 +1504,8 @@ static int getRegClass(RegisterKind Is, unsigned RegWidth) {
|
||||
case 1: return AMDGPU::SGPR_32RegClassID;
|
||||
case 2: return AMDGPU::SGPR_64RegClassID;
|
||||
case 4: return AMDGPU::SGPR_128RegClassID;
|
||||
case 8: return AMDGPU::SReg_256RegClassID;
|
||||
case 16: return AMDGPU::SReg_512RegClassID;
|
||||
case 8: return AMDGPU::SGPR_256RegClassID;
|
||||
case 16: return AMDGPU::SGPR_512RegClassID;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
|
@ -348,10 +348,12 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
|
||||
case AMDGPU::TTMP_128RegClassID:
|
||||
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
|
||||
// this bundle?
|
||||
case AMDGPU::SReg_256RegClassID:
|
||||
// ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
|
||||
case AMDGPU::SGPR_256RegClassID:
|
||||
case AMDGPU::TTMP_256RegClassID:
|
||||
// ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
|
||||
// this bundle?
|
||||
case AMDGPU::SReg_512RegClassID:
|
||||
case AMDGPU::SGPR_512RegClassID:
|
||||
case AMDGPU::TTMP_512RegClassID:
|
||||
shift = 2;
|
||||
break;
|
||||
// ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
|
||||
@ -441,11 +443,11 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
|
||||
return createSRegOperand(AMDGPU::SReg_256RegClassID, Val);
|
||||
return decodeDstOp(OPW256, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
|
||||
return createSRegOperand(AMDGPU::SReg_512RegClassID, Val);
|
||||
return decodeDstOp(OPW512, Val);
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
|
||||
@ -593,6 +595,8 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
|
||||
return SGPR_32RegClassID;
|
||||
case OPW64: return SGPR_64RegClassID;
|
||||
case OPW128: return SGPR_128RegClassID;
|
||||
case OPW256: return SGPR_256RegClassID;
|
||||
case OPW512: return SGPR_512RegClassID;
|
||||
}
|
||||
}
|
||||
|
||||
@ -608,6 +612,8 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
|
||||
return TTMP_32RegClassID;
|
||||
case OPW64: return TTMP_64RegClassID;
|
||||
case OPW128: return TTMP_128RegClassID;
|
||||
case OPW256: return TTMP_256RegClassID;
|
||||
case OPW512: return TTMP_512RegClassID;
|
||||
}
|
||||
}
|
||||
|
||||
@ -659,6 +665,25 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
|
||||
}
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
|
||||
using namespace AMDGPU::EncValues;
|
||||
|
||||
assert(Val < 128);
|
||||
assert(Width == OPW256 || Width == OPW512);
|
||||
|
||||
if (Val <= SGPR_MAX) {
|
||||
assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning.
|
||||
return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
|
||||
}
|
||||
|
||||
int TTmpIdx = getTTmpIdx(Val);
|
||||
if (TTmpIdx >= 0) {
|
||||
return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
|
||||
}
|
||||
|
||||
llvm_unreachable("unknown dst register");
|
||||
}
|
||||
|
||||
MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
|
||||
using namespace AMDGPU;
|
||||
|
||||
|
@ -95,6 +95,8 @@ public:
|
||||
OPW32,
|
||||
OPW64,
|
||||
OPW128,
|
||||
OPW256,
|
||||
OPW512,
|
||||
OPW16,
|
||||
OPWV216,
|
||||
OPW_LAST_,
|
||||
@ -110,6 +112,7 @@ public:
|
||||
MCOperand decodeLiteralConstant() const;
|
||||
|
||||
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
|
||||
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
|
||||
MCOperand decodeSpecialReg32(unsigned Val) const;
|
||||
MCOperand decodeSpecialReg64(unsigned Val) const;
|
||||
|
||||
|
@ -335,13 +335,13 @@ void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
|
||||
} else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
|
||||
O << 'v';
|
||||
NumRegs = 8;
|
||||
} else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) {
|
||||
} else if (MRI.getRegClass(AMDGPU::SGPR_256RegClassID).contains(RegNo)) {
|
||||
O << 's';
|
||||
NumRegs = 8;
|
||||
} else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
|
||||
O << 'v';
|
||||
NumRegs = 16;
|
||||
} else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) {
|
||||
} else if (MRI.getRegClass(AMDGPU::SGPR_512RegClassID).contains(RegNo)) {
|
||||
O << 's';
|
||||
NumRegs = 16;
|
||||
} else {
|
||||
|
@ -7,6 +7,26 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helpers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class getSubRegs<int size> {
|
||||
list<SubRegIndex> ret2 = [sub0, sub1];
|
||||
list<SubRegIndex> ret3 = [sub0, sub1, sub2];
|
||||
list<SubRegIndex> ret4 = [sub0, sub1, sub2, sub3];
|
||||
list<SubRegIndex> ret8 = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
||||
list<SubRegIndex> ret16 = [sub0, sub1, sub2, sub3,
|
||||
sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11,
|
||||
sub12, sub13, sub14, sub15];
|
||||
|
||||
list<SubRegIndex> ret = !if(!eq(size, 2), ret2,
|
||||
!if(!eq(size, 3), ret3,
|
||||
!if(!eq(size, 4), ret4,
|
||||
!if(!eq(size, 8), ret8, ret16))));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the SI registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -141,19 +161,19 @@ def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
}
|
||||
|
||||
// SGPR 64-bit registers
|
||||
def SGPR_64Regs : RegisterTuples<[sub0, sub1],
|
||||
def SGPR_64Regs : RegisterTuples<getSubRegs<2>.ret,
|
||||
[(add (decimate SGPR_32, 2)),
|
||||
(add (decimate (shl SGPR_32, 1), 2))]>;
|
||||
|
||||
// SGPR 128-bit registers
|
||||
def SGPR_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
def SGPR_128Regs : RegisterTuples<getSubRegs<4>.ret,
|
||||
[(add (decimate SGPR_32, 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
(add (decimate (shl SGPR_32, 3), 4))]>;
|
||||
|
||||
// SGPR 256-bit registers
|
||||
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
def SGPR_256Regs : RegisterTuples<getSubRegs<8>.ret,
|
||||
[(add (decimate SGPR_32, 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
@ -164,8 +184,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
(add (decimate (shl SGPR_32, 7), 4))]>;
|
||||
|
||||
// SGPR 512-bit registers
|
||||
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
|
||||
def SGPR_512Regs : RegisterTuples<getSubRegs<16>.ret,
|
||||
[(add (decimate SGPR_32, 4)),
|
||||
(add (decimate (shl SGPR_32, 1), 4)),
|
||||
(add (decimate (shl SGPR_32, 2), 4)),
|
||||
@ -190,47 +209,125 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
|
||||
}
|
||||
|
||||
// Trap handler TMP 64-bit registers
|
||||
def TTMP_64Regs : RegisterTuples<[sub0, sub1],
|
||||
def TTMP_64Regs : RegisterTuples<getSubRegs<2>.ret,
|
||||
[(add (decimate TTMP_32, 2)),
|
||||
(add (decimate (shl TTMP_32, 1), 2))]>;
|
||||
|
||||
// Trap handler TMP 128-bit registers
|
||||
def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
def TTMP_128Regs : RegisterTuples<getSubRegs<4>.ret,
|
||||
[(add (decimate TTMP_32, 4)),
|
||||
(add (decimate (shl TTMP_32, 1), 4)),
|
||||
(add (decimate (shl TTMP_32, 2), 4)),
|
||||
(add (decimate (shl TTMP_32, 3), 4))]>;
|
||||
|
||||
class TmpRegTuples <string tgt,
|
||||
bit Is64Bit,
|
||||
int Index0,
|
||||
int Index1 = !add(Index0, 1),
|
||||
int Index2 = !add(Index0, !if(Is64Bit, 1, 2)),
|
||||
int Index3 = !add(Index0, !if(Is64Bit, 1, 3)),
|
||||
string name = "ttmp["#Index0#":"#Index3#"]",
|
||||
Register r0 = !cast<Register>("TTMP"#Index0#tgt),
|
||||
Register r1 = !cast<Register>("TTMP"#Index1#tgt),
|
||||
Register r2 = !cast<Register>("TTMP"#Index2#tgt),
|
||||
Register r3 = !cast<Register>("TTMP"#Index3#tgt)> :
|
||||
RegisterWithSubRegs<name, !if(Is64Bit, [r0, r1], [r0, r1, r2, r3])> {
|
||||
let SubRegIndices = !if(Is64Bit, [sub0, sub1], [sub0, sub1, sub2, sub3]);
|
||||
let HWEncoding = r0.HWEncoding;
|
||||
def TTMP_256Regs : RegisterTuples<getSubRegs<8>.ret,
|
||||
[(add (decimate TTMP_32, 4)),
|
||||
(add (decimate (shl TTMP_32, 1), 4)),
|
||||
(add (decimate (shl TTMP_32, 2), 4)),
|
||||
(add (decimate (shl TTMP_32, 3), 4)),
|
||||
(add (decimate (shl TTMP_32, 4), 4)),
|
||||
(add (decimate (shl TTMP_32, 5), 4)),
|
||||
(add (decimate (shl TTMP_32, 6), 4)),
|
||||
(add (decimate (shl TTMP_32, 7), 4))]>;
|
||||
|
||||
def TTMP_512Regs : RegisterTuples<getSubRegs<16>.ret,
|
||||
[(add (decimate TTMP_32, 4)),
|
||||
(add (decimate (shl TTMP_32, 1), 4)),
|
||||
(add (decimate (shl TTMP_32, 2), 4)),
|
||||
(add (decimate (shl TTMP_32, 3), 4)),
|
||||
(add (decimate (shl TTMP_32, 4), 4)),
|
||||
(add (decimate (shl TTMP_32, 5), 4)),
|
||||
(add (decimate (shl TTMP_32, 6), 4)),
|
||||
(add (decimate (shl TTMP_32, 7), 4)),
|
||||
(add (decimate (shl TTMP_32, 8), 4)),
|
||||
(add (decimate (shl TTMP_32, 9), 4)),
|
||||
(add (decimate (shl TTMP_32, 10), 4)),
|
||||
(add (decimate (shl TTMP_32, 11), 4)),
|
||||
(add (decimate (shl TTMP_32, 12), 4)),
|
||||
(add (decimate (shl TTMP_32, 13), 4)),
|
||||
(add (decimate (shl TTMP_32, 14), 4)),
|
||||
(add (decimate (shl TTMP_32, 15), 4))]>;
|
||||
|
||||
class TmpRegTuplesBase<int index, int size,
|
||||
list<Register> subRegs,
|
||||
list<SubRegIndex> indices = getSubRegs<size>.ret,
|
||||
int index1 = !add(index, !add(size, -1)),
|
||||
string name = "ttmp["#index#":"#index1#"]"> :
|
||||
RegisterWithSubRegs<name, subRegs> {
|
||||
let HWEncoding = subRegs[0].HWEncoding;
|
||||
let SubRegIndices = indices;
|
||||
}
|
||||
|
||||
class TmpRegTuples<string tgt,
|
||||
int size,
|
||||
int index0,
|
||||
int index1 = !add(index0, 1),
|
||||
int index2 = !add(index0, !if(!eq(size, 2), 1, 2)),
|
||||
int index3 = !add(index0, !if(!eq(size, 2), 1, 3)),
|
||||
int index4 = !add(index0, !if(!eq(size, 8), 4, 1)),
|
||||
int index5 = !add(index0, !if(!eq(size, 8), 5, 1)),
|
||||
int index6 = !add(index0, !if(!eq(size, 8), 6, 1)),
|
||||
int index7 = !add(index0, !if(!eq(size, 8), 7, 1)),
|
||||
Register r0 = !cast<Register>("TTMP"#index0#tgt),
|
||||
Register r1 = !cast<Register>("TTMP"#index1#tgt),
|
||||
Register r2 = !cast<Register>("TTMP"#index2#tgt),
|
||||
Register r3 = !cast<Register>("TTMP"#index3#tgt),
|
||||
Register r4 = !cast<Register>("TTMP"#index4#tgt),
|
||||
Register r5 = !cast<Register>("TTMP"#index5#tgt),
|
||||
Register r6 = !cast<Register>("TTMP"#index6#tgt),
|
||||
Register r7 = !cast<Register>("TTMP"#index7#tgt)> :
|
||||
TmpRegTuplesBase<index0, size,
|
||||
!if(!eq(size, 2), [r0, r1],
|
||||
!if(!eq(size, 4), [r0, r1, r2, r3],
|
||||
[r0, r1, r2, r3, r4, r5, r6, r7])),
|
||||
getSubRegs<size>.ret>;
|
||||
|
||||
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 1, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 1, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
|
||||
}
|
||||
|
||||
foreach Index = {0, 4, 8, 12} in {
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#
|
||||
_TTMP#!add(Index,2)#
|
||||
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 0, Index>;
|
||||
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#
|
||||
_TTMP#!add(Index,2)#
|
||||
_TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 0, Index>;
|
||||
_TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
|
||||
}
|
||||
|
||||
foreach Index = {0, 4, 8} in {
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#
|
||||
_TTMP#!add(Index,2)#
|
||||
_TTMP#!add(Index,3)#
|
||||
_TTMP#!add(Index,4)#
|
||||
_TTMP#!add(Index,5)#
|
||||
_TTMP#!add(Index,6)#
|
||||
_TTMP#!add(Index,7)#_vi : TmpRegTuples<"_vi", 8, Index>;
|
||||
def TTMP#Index#_TTMP#!add(Index,1)#
|
||||
_TTMP#!add(Index,2)#
|
||||
_TTMP#!add(Index,3)#
|
||||
_TTMP#!add(Index,4)#
|
||||
_TTMP#!add(Index,5)#
|
||||
_TTMP#!add(Index,6)#
|
||||
_TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
|
||||
}
|
||||
|
||||
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
|
||||
TmpRegTuplesBase<0, 16,
|
||||
[TTMP0_vi, TTMP1_vi, TTMP2_vi, TTMP3_vi,
|
||||
TTMP4_vi, TTMP5_vi, TTMP6_vi, TTMP7_vi,
|
||||
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
|
||||
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
|
||||
|
||||
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
|
||||
TmpRegTuplesBase<0, 16,
|
||||
[TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
|
||||
TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
|
||||
TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
|
||||
TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
|
||||
|
||||
|
||||
// VGPR 32-bit registers
|
||||
// i16/f16 only on VI+
|
||||
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
@ -240,25 +337,25 @@ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||
}
|
||||
|
||||
// VGPR 64-bit registers
|
||||
def VGPR_64 : RegisterTuples<[sub0, sub1],
|
||||
def VGPR_64 : RegisterTuples<getSubRegs<2>.ret,
|
||||
[(add (trunc VGPR_32, 255)),
|
||||
(add (shl VGPR_32, 1))]>;
|
||||
|
||||
// VGPR 96-bit registers
|
||||
def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
|
||||
def VGPR_96 : RegisterTuples<getSubRegs<3>.ret,
|
||||
[(add (trunc VGPR_32, 254)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2))]>;
|
||||
|
||||
// VGPR 128-bit registers
|
||||
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
|
||||
def VGPR_128 : RegisterTuples<getSubRegs<4>.ret,
|
||||
[(add (trunc VGPR_32, 253)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
(add (shl VGPR_32, 3))]>;
|
||||
|
||||
// VGPR 256-bit registers
|
||||
def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
def VGPR_256 : RegisterTuples<getSubRegs<8>.ret,
|
||||
[(add (trunc VGPR_32, 249)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
@ -269,8 +366,7 @@ def VGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
|
||||
(add (shl VGPR_32, 7))]>;
|
||||
|
||||
// VGPR 512-bit registers
|
||||
def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
|
||||
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
|
||||
def VGPR_512 : RegisterTuples<getSubRegs<16>.ret,
|
||||
[(add (trunc VGPR_32, 241)),
|
||||
(add (shl VGPR_32, 1)),
|
||||
(add (shl VGPR_32, 2)),
|
||||
@ -368,13 +464,31 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,
|
||||
|
||||
} // End CopyCost = 2
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
|
||||
def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> {
|
||||
let AllocationPriority = 11;
|
||||
}
|
||||
|
||||
def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32,
|
||||
(add SGPR_256, TTMP_256)> {
|
||||
// Requires 4 s_mov_b64 to copy
|
||||
let CopyCost = 4;
|
||||
let AllocationPriority = 11;
|
||||
}
|
||||
|
||||
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> {
|
||||
def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> {
|
||||
let AllocationPriority = 12;
|
||||
}
|
||||
|
||||
def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32,
|
||||
(add SGPR_512, TTMP_512)> {
|
||||
// Requires 8 s_mov_b64 to copy
|
||||
let CopyCost = 8;
|
||||
let AllocationPriority = 12;
|
||||
|
@ -667,6 +667,10 @@ bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
|
||||
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
|
||||
CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
|
||||
CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
|
||||
}
|
||||
|
||||
#define CASE_CI_VI(node) \
|
||||
|
@ -189,6 +189,48 @@ s_mov_b64 ttmp[14:15], exec
|
||||
// NOSICIVI: error: not a valid operand
|
||||
// GFX9: s_mov_b64 ttmp[14:15], exec ; encoding: [0x7e,0x01,0xfa,0xbe]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Trap Handler related - 8-dword registers
|
||||
// NB: gfx7 doc states that SMRD does not support trap registers for dst
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0
|
||||
// VI: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
// GFX9: [0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0
|
||||
// VI: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
// GFX9: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_buffer_load_dwordx8 ttmp[8:15], s[0:3], s0
|
||||
// NOSICIVI: error: not a valid operand
|
||||
// GFX9: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_load_dwordx8 ttmp[0:7], s[0:1], s0
|
||||
// VI: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
// GFX9: [0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_load_dwordx8 ttmp[4:11], s[0:1], s0
|
||||
// VI: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
// GFX9: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_load_dwordx8 ttmp[8:15], s[0:1], s0
|
||||
// NOSICIVI: error: not a valid operand
|
||||
// GFX9: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Trap Handler related - 16-dword registers
|
||||
// NB: gfx7 doc states that SMRD does not support trap registers for dst
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
s_buffer_load_dwordx16 ttmp[0:15], s[0:3], s0
|
||||
// NOSICIVI: error: not a valid operand
|
||||
// GFX9: [0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
s_load_dwordx16 ttmp[0:15], s[0:1], s0
|
||||
// NOSICIVI: error: not a valid operand
|
||||
// GFX9: [0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Trap Handler related - Some specific instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -107,3 +107,35 @@
|
||||
|
||||
# GFX9: buffer_atomic_inc v1, off, ttmp[12:15], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8]
|
||||
0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8
|
||||
|
||||
#===----------------------------------------------------------------------===#
|
||||
# Trap Handler related - 8-dword registers
|
||||
#===----------------------------------------------------------------------===#
|
||||
|
||||
# GFX9: s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 ; encoding: [0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1b,0x2c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 ; encoding: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_buffer_load_dwordx8 ttmp[8:15], s[0:3], s0 ; encoding: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_load_dwordx8 ttmp[0:7], s[0:1], s0 ; encoding: [0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1b,0x0c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_load_dwordx8 ttmp[4:11], s[0:1], s0 ; encoding: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_load_dwordx8 ttmp[8:15], s[0:1], s0 ; encoding: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
#===----------------------------------------------------------------------===#
|
||||
# Trap Handler related - 16-dword registers
|
||||
#===----------------------------------------------------------------------===#
|
||||
|
||||
# GFX9: s_buffer_load_dwordx16 ttmp[0:15], s[0:3], s0 ; encoding: [0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1b,0x30,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# GFX9: s_load_dwordx16 ttmp[0:15], s[0:1], s0 ; encoding: [0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1b,0x10,0xc0,0x00,0x00,0x00,0x00
|
||||
|
@ -107,3 +107,19 @@
|
||||
|
||||
# VI: buffer_atomic_inc v1, off, ttmp[8:11], 56 glc ; encoding: [0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8]
|
||||
0x00,0x40,0x2c,0xe1,0x00,0x01,0x1e,0xb8
|
||||
|
||||
#===----------------------------------------------------------------------===#
|
||||
# Trap Handler related - 8-dword registers
|
||||
#===----------------------------------------------------------------------===#
|
||||
|
||||
# VI: s_buffer_load_dwordx8 ttmp[0:7], s[0:3], s0 ; encoding: [0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1c,0x2c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# VI: s_buffer_load_dwordx8 ttmp[4:11], s[0:3], s0 ; encoding: [0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1d,0x2c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# VI: s_load_dwordx8 ttmp[0:7], s[0:1], s0 ; encoding: [0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1c,0x0c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
||||
# VI: s_load_dwordx8 ttmp[4:11], s[0:1], s0 ; encoding: [0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00]
|
||||
0x00,0x1d,0x0c,0xc0,0x00,0x00,0x00,0x00
|
||||
|
Loading…
x
Reference in New Issue
Block a user