mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU: Stop assuming vreg for build_vector
This was causing a variety of test failures when v2i64 is added as a legal type. SIFixSGPRCopies should correctly handle the case of vector inputs to a scalar reg_sequence, so this isn't necessary anymore. This was hiding some deficiencies in how reg_sequence is handled later, but this shouldn't be a problem anymore since the register class copy of a reg_sequence is now done before the reg_sequence. llvm-svn: 251860
This commit is contained in:
parent
2e77ccebb9
commit
456805768c
@ -285,6 +285,38 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
|
||||
return N;
|
||||
}
|
||||
|
||||
static unsigned selectVectorRegClassID(unsigned NumVectorElts, bool UseVGPR) {
|
||||
if (UseVGPR) {
|
||||
switch (NumVectorElts) {
|
||||
case 1:
|
||||
return AMDGPU::VGPR_32RegClassID;
|
||||
case 2:
|
||||
return AMDGPU::VReg_64RegClassID;
|
||||
case 4:
|
||||
return AMDGPU::VReg_128RegClassID;
|
||||
case 8:
|
||||
return AMDGPU::VReg_256RegClassID;
|
||||
case 16:
|
||||
return AMDGPU::VReg_512RegClassID;
|
||||
}
|
||||
}
|
||||
|
||||
switch (NumVectorElts) {
|
||||
case 1:
|
||||
return AMDGPU::SReg_32RegClassID;
|
||||
case 2:
|
||||
return AMDGPU::SReg_64RegClassID;
|
||||
case 4:
|
||||
return AMDGPU::SReg_128RegClassID;
|
||||
case 8:
|
||||
return AMDGPU::SReg_256RegClassID;
|
||||
case 16:
|
||||
return AMDGPU::SReg_512RegClassID;
|
||||
}
|
||||
|
||||
llvm_unreachable("invalid vector size");
|
||||
}
|
||||
|
||||
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned int Opc = N->getOpcode();
|
||||
if (N->isMachineOpcode()) {
|
||||
@ -318,7 +350,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
assert(EltVT.bitsEq(MVT::i32));
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
bool UseVReg = true;
|
||||
bool UseVReg = false;
|
||||
|
||||
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
|
||||
U != E; ++U) {
|
||||
if (!U->isMachineOpcode()) {
|
||||
@ -332,24 +365,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
UseVReg = false;
|
||||
}
|
||||
}
|
||||
switch(NumVectorElts) {
|
||||
case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
|
||||
AMDGPU::SReg_32RegClassID;
|
||||
break;
|
||||
case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
|
||||
AMDGPU::SReg_64RegClassID;
|
||||
break;
|
||||
case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
|
||||
AMDGPU::SReg_128RegClassID;
|
||||
break;
|
||||
case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
|
||||
AMDGPU::SReg_256RegClassID;
|
||||
break;
|
||||
case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
|
||||
AMDGPU::SReg_512RegClassID;
|
||||
break;
|
||||
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
|
||||
}
|
||||
|
||||
RegClassID = selectVectorRegClassID(NumVectorElts, UseVReg);
|
||||
} else {
|
||||
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
|
||||
// that adds a 128 bits reg copy when going through TwoAddressInstructions
|
||||
|
@ -239,12 +239,15 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
||||
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
||||
|
||||
SmallVector<MachineInstr *, 16> Worklist;
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
|
||||
@ -8,9 +8,22 @@ declare i32 @llvm.r600.read.tidig.x() readnone
|
||||
; scc instead.
|
||||
|
||||
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
|
||||
; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, v{{[0-9]+}}
|
||||
; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
|
||||
define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
|
||||
%v.val = load volatile i32, i32 addrspace(1)* %in
|
||||
%vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0
|
||||
%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
|
||||
%bc = bitcast <2 x i32> %vec.1 to i64
|
||||
%add = add i64 %bc, 399
|
||||
store i64 %add, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:
|
||||
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f
|
||||
; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0
|
||||
define void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
|
||||
%vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
|
||||
%vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
|
||||
%bc = bitcast <2 x i32> %vec.1 to i64
|
||||
@ -22,7 +35,20 @@ define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
|
||||
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
|
||||
define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
|
||||
%v.val = load volatile i32, i32 addrspace(1)* %in
|
||||
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
|
||||
%vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
|
||||
%bc = bitcast <2 x i32> %vec.1 to i64
|
||||
%add = add i64 %bc, %val1
|
||||
store i64 %add, i64 addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:
|
||||
; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}
|
||||
define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
|
||||
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
|
||||
%vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
|
||||
%bc = bitcast <2 x i32> %vec.1 to i64
|
||||
@ -32,9 +58,9 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64
|
||||
}
|
||||
|
||||
; Doesn't use constants
|
||||
; FUNC-LABEL @imp_def_vcc_split_i64_add_2
|
||||
; SI: v_add_i32
|
||||
; SI: v_addc_u32
|
||||
; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:
|
||||
; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
|
||||
define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() readnone
|
||||
%gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
|
||||
|
Loading…
Reference in New Issue
Block a user