mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[AMDGPU] Tidy SReg/SGPR definitions using template class
Use a multiclass to consistently define SReg/SGPR/TTMP register classes. Add missing TTMP registers for 96b, 160b, 192b, 224b. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D105800
This commit is contained in:
parent
54c34b405b
commit
9ee7bab63e
@ -373,6 +373,7 @@ def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
|
|||||||
let isAllocatable = 0;
|
let isAllocatable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Trap handler TMP 16-bit registers
|
||||||
def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
|
def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
|
||||||
(add (sequence "TTMP%u_LO16", 0, 15))> {
|
(add (sequence "TTMP%u_LO16", 0, 15))> {
|
||||||
let Size = 16;
|
let Size = 16;
|
||||||
@ -382,11 +383,25 @@ def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
|
|||||||
// Trap handler TMP 64-bit registers
|
// Trap handler TMP 64-bit registers
|
||||||
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
|
def TTMP_64Regs : SIRegisterTuples<getSubRegs<2>.ret, TTMP_32, 15, 2, 2, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 96-bit registers
|
||||||
|
def TTMP_96Regs : SIRegisterTuples<getSubRegs<3>.ret, TTMP_32, 15, 3, 3, "ttmp">;
|
||||||
|
|
||||||
// Trap handler TMP 128-bit registers
|
// Trap handler TMP 128-bit registers
|
||||||
def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;
|
def TTMP_128Regs : SIRegisterTuples<getSubRegs<4>.ret, TTMP_32, 15, 4, 4, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 160-bit registers
|
||||||
|
def TTMP_160Regs : SIRegisterTuples<getSubRegs<5>.ret, TTMP_32, 15, 4, 5, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 192-bit registers
|
||||||
|
def TTMP_192Regs : SIRegisterTuples<getSubRegs<6>.ret, TTMP_32, 15, 4, 6, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 224-bit registers
|
||||||
|
def TTMP_224Regs : SIRegisterTuples<getSubRegs<7>.ret, TTMP_32, 15, 4, 7, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 256-bit registers
|
||||||
def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;
|
def TTMP_256Regs : SIRegisterTuples<getSubRegs<8>.ret, TTMP_32, 15, 4, 8, "ttmp">;
|
||||||
|
|
||||||
|
// Trap handler TMP 512-bit registers
|
||||||
def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;
|
def TTMP_512Regs : SIRegisterTuples<getSubRegs<16>.ret, TTMP_32, 15, 4, 16, "ttmp">;
|
||||||
|
|
||||||
class TmpRegTuplesBase<int index, int size,
|
class TmpRegTuplesBase<int index, int size,
|
||||||
@ -693,130 +708,51 @@ def SReg_1 : RegisterClass<"AMDGPU", [i1], 32,
|
|||||||
let isAllocatable = 0;
|
let isAllocatable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Requires 2 s_mov_b64 to copy
|
multiclass SRegClass<int numRegs, int priority,
|
||||||
let CopyCost = 2 in {
|
list<ValueType> regTypes,
|
||||||
|
SIRegisterTuples regList,
|
||||||
|
SIRegisterTuples ttmpList = regList,
|
||||||
|
int copyCost = !sra(!add(numRegs, 1), 1)> {
|
||||||
|
defvar hasTTMP = !ne(regList, ttmpList);
|
||||||
|
defvar suffix = !cast<string>(!mul(numRegs, 32));
|
||||||
|
defvar sgprName = !strconcat("SGPR_", suffix);
|
||||||
|
defvar ttmpName = !strconcat("TTMP_", suffix);
|
||||||
|
|
||||||
// There are no 3-component scalar instructions, but this is needed
|
let AllocationPriority = priority, CopyCost = copyCost in {
|
||||||
// for symmetry with VGPRs.
|
def "" # sgprName : RegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
|
||||||
def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
|
}
|
||||||
(add SGPR_96Regs)> {
|
|
||||||
let AllocationPriority = 14;
|
if hasTTMP then {
|
||||||
|
def "" # ttmpName : RegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
|
||||||
|
let isAllocatable = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def SReg_ # suffix :
|
||||||
|
RegisterClass<"AMDGPU", regTypes, 32,
|
||||||
|
!con(!dag(add, [!cast<RegisterClass>(sgprName)], ["sgpr"]),
|
||||||
|
!if(hasTTMP,
|
||||||
|
!dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]),
|
||||||
|
(add)))> {
|
||||||
|
let isAllocatable = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32,
|
defm "" : SRegClass<3, 14, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
|
||||||
(add SGPR_96)> {
|
defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64], SGPR_128Regs, TTMP_128Regs>;
|
||||||
let AllocationPriority = 14;
|
defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
|
||||||
}
|
defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
|
||||||
|
defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
|
||||||
def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
|
defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256Regs>;
|
||||||
(add SGPR_128Regs)> {
|
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
|
||||||
let AllocationPriority = 15;
|
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
|
||||||
}
|
|
||||||
|
|
||||||
def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32,
|
|
||||||
(add TTMP_128Regs)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32,
|
|
||||||
(add SGPR_128, TTMP_128)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // End CopyCost = 2
|
|
||||||
|
|
||||||
// There are no 5-component scalar instructions, but this is needed
|
|
||||||
// for symmetry with VGPRs.
|
|
||||||
def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
|
|
||||||
(add SGPR_160Regs)> {
|
|
||||||
let AllocationPriority = 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32,
|
|
||||||
(add SGPR_160)> {
|
|
||||||
// FIXME: Should be isAllocatable = 0, but that causes all TableGen-generated
|
|
||||||
// subclasses of SGPR_160 to be marked unallocatable too.
|
|
||||||
// This occurs because SGPR_160 and SReg_160 classes are equivalent in size
|
|
||||||
// meaning their enumeration order is dependent on alphanumeric ordering of
|
|
||||||
// their names. The superclass for inherence is the last one in topological
|
|
||||||
// order (i.e. enumeration order), hence SReg_160 is selected.
|
|
||||||
// Potential workarounds involve renaming SGPR_160, adding another class
|
|
||||||
// which is ordered last and hence used for inheritance, or adding more
|
|
||||||
// registers to SReg_160 to cause it to be moved earlier in the superclass
|
|
||||||
// list.
|
|
||||||
let CopyCost = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// There are no 6-component scalar instructions, but this is needed
|
|
||||||
// for symmetry with VGPRs.
|
|
||||||
def SGPR_192 : RegisterClass<"AMDGPU", [v6i32, v6f32, v3i64, v3f64], 32, (add SGPR_192Regs)> {
|
|
||||||
let AllocationPriority = 17;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_192 : RegisterClass<"AMDGPU", [v6i32, v6f32, v3i64, v3f64], 32, (add SGPR_192)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
let CopyCost = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// There are no 7-component scalar instructions, but this is needed
|
|
||||||
// for symmetry with VGPRs.
|
|
||||||
def SGPR_224 : RegisterClass<"AMDGPU", [v7i32, v7f32], 32, (add SGPR_224Regs)> {
|
|
||||||
let AllocationPriority = 18;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_224 : RegisterClass<"AMDGPU", [v7i32, v7f32], 32, (add SGPR_224)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
let CopyCost = 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32, (add SGPR_256Regs)> {
|
|
||||||
let AllocationPriority = 19;
|
|
||||||
}
|
|
||||||
|
|
||||||
def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32, (add TTMP_256Regs)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32, v4i64, v4f64], 32,
|
|
||||||
(add SGPR_256, TTMP_256)> {
|
|
||||||
// Requires 4 s_mov_b64 to copy
|
|
||||||
let CopyCost = 4;
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
|
|
||||||
(add SGPR_512Regs)> {
|
|
||||||
let AllocationPriority = 20;
|
|
||||||
}
|
|
||||||
|
|
||||||
def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
|
|
||||||
(add TTMP_512Regs)> {
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32, v8i64, v8f64], 32,
|
|
||||||
(add SGPR_512, TTMP_512)> {
|
|
||||||
// Requires 8 s_mov_b64 to copy
|
|
||||||
let CopyCost = 8;
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
|
||||||
(add VGPR_32, LDS_DIRECT_CLASS)> {
|
(add VGPR_32, LDS_DIRECT_CLASS)> {
|
||||||
let isAllocatable = 0;
|
let isAllocatable = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32,
|
|
||||||
(add SGPR_1024Regs)> {
|
|
||||||
let AllocationPriority = 21;
|
|
||||||
}
|
|
||||||
|
|
||||||
def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32, v16i64, v16f64], 32,
|
|
||||||
(add SGPR_1024)> {
|
|
||||||
let CopyCost = 16;
|
|
||||||
let isAllocatable = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register class for all vector registers (VGPRs + Interpolation Registers)
|
// Register class for all vector registers (VGPRs + Interpolation Registers)
|
||||||
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
|
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
|
||||||
RegisterClass<"AMDGPU", regTypes, 32, regList> {
|
RegisterClass<"AMDGPU", regTypes, 32, regList> {
|
||||||
|
@ -8,15 +8,15 @@
|
|||||||
define amdgpu_kernel void @s_input_output_i128() {
|
define amdgpu_kernel void @s_input_output_i128() {
|
||||||
; GFX908-LABEL: name: s_input_output_i128
|
; GFX908-LABEL: name: s_input_output_i128
|
||||||
; GFX908: bb.0 (%ir-block.0):
|
; GFX908: bb.0 (%ir-block.0):
|
||||||
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:SGPR_128 */, def %4
|
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4
|
||||||
; GFX908: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
; GFX908: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||||
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:SGPR_128 */, [[COPY]]
|
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]]
|
||||||
; GFX908: S_ENDPGM 0
|
; GFX908: S_ENDPGM 0
|
||||||
; GFX90A-LABEL: name: s_input_output_i128
|
; GFX90A-LABEL: name: s_input_output_i128
|
||||||
; GFX90A: bb.0 (%ir-block.0):
|
; GFX90A: bb.0 (%ir-block.0):
|
||||||
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:SGPR_128 */, def %4
|
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4
|
||||||
; GFX90A: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
; GFX90A: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||||
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:SGPR_128 */, [[COPY]]
|
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]]
|
||||||
; GFX90A: S_ENDPGM 0
|
; GFX90A: S_ENDPGM 0
|
||||||
%val = tail call i128 asm sideeffect "; def $0", "=s"()
|
%val = tail call i128 asm sideeffect "; def $0", "=s"()
|
||||||
call void asm sideeffect "; use $0", "s"(i128 %val)
|
call void asm sideeffect "; use $0", "s"(i128 %val)
|
||||||
@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
|
|||||||
define amdgpu_kernel void @v_input_output_i128() {
|
define amdgpu_kernel void @v_input_output_i128() {
|
||||||
; GFX908-LABEL: name: v_input_output_i128
|
; GFX908-LABEL: name: v_input_output_i128
|
||||||
; GFX908: bb.0 (%ir-block.0):
|
; GFX908: bb.0 (%ir-block.0):
|
||||||
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4390922 /* regdef:VReg_128 */, def %4
|
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:VReg_128 */, def %4
|
||||||
; GFX908: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
|
; GFX908: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
|
||||||
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4390921 /* reguse:VReg_128 */, [[COPY]]
|
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:VReg_128 */, [[COPY]]
|
||||||
; GFX908: S_ENDPGM 0
|
; GFX908: S_ENDPGM 0
|
||||||
; GFX90A-LABEL: name: v_input_output_i128
|
; GFX90A-LABEL: name: v_input_output_i128
|
||||||
; GFX90A: bb.0 (%ir-block.0):
|
; GFX90A: bb.0 (%ir-block.0):
|
||||||
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4521994 /* regdef:VReg_128_Align2 */, def %4
|
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4849674 /* regdef:VReg_128_Align2 */, def %4
|
||||||
; GFX90A: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
|
; GFX90A: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
|
||||||
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4521993 /* reguse:VReg_128_Align2 */, [[COPY]]
|
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4849673 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||||
; GFX90A: S_ENDPGM 0
|
; GFX90A: S_ENDPGM 0
|
||||||
%val = tail call i128 asm sideeffect "; def $0", "=v"()
|
%val = tail call i128 asm sideeffect "; def $0", "=v"()
|
||||||
call void asm sideeffect "; use $0", "v"(i128 %val)
|
call void asm sideeffect "; use $0", "v"(i128 %val)
|
||||||
@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
|
|||||||
define amdgpu_kernel void @a_input_output_i128() {
|
define amdgpu_kernel void @a_input_output_i128() {
|
||||||
; GFX908-LABEL: name: a_input_output_i128
|
; GFX908-LABEL: name: a_input_output_i128
|
||||||
; GFX908: bb.0 (%ir-block.0):
|
; GFX908: bb.0 (%ir-block.0):
|
||||||
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4325386 /* regdef:AReg_128 */, def %4
|
; GFX908: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4653066 /* regdef:AReg_128 */, def %4
|
||||||
; GFX908: [[COPY:%[0-9]+]]:areg_128 = COPY %4
|
; GFX908: [[COPY:%[0-9]+]]:areg_128 = COPY %4
|
||||||
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4325385 /* reguse:AReg_128 */, [[COPY]]
|
; GFX908: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_128 */, [[COPY]]
|
||||||
; GFX908: S_ENDPGM 0
|
; GFX908: S_ENDPGM 0
|
||||||
; GFX90A-LABEL: name: a_input_output_i128
|
; GFX90A-LABEL: name: a_input_output_i128
|
||||||
; GFX90A: bb.0 (%ir-block.0):
|
; GFX90A: bb.0 (%ir-block.0):
|
||||||
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4456458 /* regdef:AReg_128_Align2 */, def %4
|
; GFX90A: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:AReg_128_Align2 */, def %4
|
||||||
; GFX90A: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
|
; GFX90A: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
|
||||||
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4456457 /* reguse:AReg_128_Align2 */, [[COPY]]
|
; GFX90A: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||||
; GFX90A: S_ENDPGM 0
|
; GFX90A: S_ENDPGM 0
|
||||||
%val = call i128 asm sideeffect "; def $0", "=a"()
|
%val = call i128 asm sideeffect "; def $0", "=a"()
|
||||||
call void asm sideeffect "; use $0", "a"(i128 %val)
|
call void asm sideeffect "; use $0", "a"(i128 %val)
|
||||||
|
Loading…
Reference in New Issue
Block a user