mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
AMDGPU/GlobalISel: RegBankSelect for amdgcn.writelane
llvm-svn: 364808
This commit is contained in:
parent
bd2d6c8925
commit
33e923886e
@ -158,7 +158,25 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic(
|
||||
const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
|
||||
return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
|
||||
}
|
||||
case Intrinsic::amdgcn_writelane: {
|
||||
static const OpRegBankEntry<4> Table[4] = {
|
||||
// Perfectly legal.
|
||||
{ { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
|
||||
|
||||
// Need readfirstlane of first op
|
||||
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
|
||||
|
||||
// Need readfirstlane of second op
|
||||
{ { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
|
||||
|
||||
// Need readfirstlane of both ops
|
||||
{ { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
|
||||
};
|
||||
|
||||
// rsrc, voffset, offset
|
||||
const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
|
||||
return addMappingFromTable<4>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table));
|
||||
}
|
||||
default:
|
||||
return RegisterBankInfo::getInstrAlternativeMappings(MI);
|
||||
}
|
||||
@ -764,6 +782,17 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
|
||||
MI.getOperand(OpIdx).setReg(SGPR);
|
||||
}
|
||||
|
||||
// For cases where only a single copy is inserted for matching register banks.
|
||||
// Replace the register in the instruction operand
|
||||
static void substituteSimpleCopyRegs(
|
||||
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx) {
|
||||
SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(OpIdx));
|
||||
if (!SrcReg.empty()) {
|
||||
assert(SrcReg.size() == 1);
|
||||
OpdMapper.getMI().getOperand(OpIdx).setReg(SrcReg[0]);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
const OperandsMapper &OpdMapper) const {
|
||||
MachineInstr &MI = OpdMapper.getMI();
|
||||
@ -982,16 +1011,23 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_readlane: {
|
||||
SmallVector<unsigned, 1> SrcReg(OpdMapper.getVRegs(2));
|
||||
|
||||
if (!SrcReg.empty()) {
|
||||
assert(SrcReg.size() == 1);
|
||||
MI.getOperand(2).setReg(SrcReg[0]);
|
||||
}
|
||||
substituteSimpleCopyRegs(OpdMapper, 2);
|
||||
|
||||
assert(empty(OpdMapper.getVRegs(0)));
|
||||
assert(empty(OpdMapper.getVRegs(3)));
|
||||
|
||||
// Make sure the index is an SGPR. It doesn't make sense to run this in a
|
||||
// waterfall loop, so assume it's a uniform value.
|
||||
constrainOpWithReadfirstlane(MI, MRI, 3); // Index
|
||||
return;
|
||||
}
|
||||
case Intrinsic::amdgcn_writelane: {
|
||||
assert(empty(OpdMapper.getVRegs(0)));
|
||||
assert(empty(OpdMapper.getVRegs(2)));
|
||||
assert(empty(OpdMapper.getVRegs(3)));
|
||||
|
||||
substituteSimpleCopyRegs(OpdMapper, 4); // VGPR input val
|
||||
constrainOpWithReadfirstlane(MI, MRI, 2); // Source value
|
||||
constrainOpWithReadfirstlane(MI, MRI, 3); // Index
|
||||
return;
|
||||
}
|
||||
@ -1664,6 +1700,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_writelane: {
|
||||
unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||
unsigned SrcReg = MI.getOperand(2).getReg();
|
||||
unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
|
||||
unsigned SrcBank = getRegBankID(SrcReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
|
||||
unsigned IdxReg = MI.getOperand(3).getReg();
|
||||
unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits();
|
||||
unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
|
||||
|
||||
// These 2 must be SGPRs, but accept VGPRs. Readfirstlane will be inserted
|
||||
// to legalize.
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
|
||||
OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
|
||||
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -0,0 +1,98 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
||||
|
||||
---
|
||||
name: writelane_sss
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2
|
||||
; CHECK-LABEL: name: writelane_sss
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY3]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = COPY $sgpr2
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
name: writelane_ssv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $vgpr0
|
||||
; CHECK-LABEL: name: writelane_ssv
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $sgpr1
|
||||
%2:_(s32) = COPY $vgpr0
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
name: writelane_vsv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: writelane_vsv
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[COPY1]](s32), [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $sgpr0
|
||||
%2:_(s32) = COPY $vgpr1
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
name: writelane_vvv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: writelane_vvv
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec
|
||||
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[V_READFIRSTLANE_B32_]], [[V_READFIRSTLANE_B32_1]], [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
||||
|
||||
---
|
||||
name: writelane_svv
|
||||
legalized: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: writelane_svv
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]], [[COPY2]](s32)
|
||||
%0:_(s32) = COPY $sgpr0
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
%2:_(s32) = COPY $vgpr1
|
||||
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.writelane), %0, %1, %2
|
||||
...
|
Loading…
x
Reference in New Issue
Block a user