mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[AArch64][GlobalISel] Implement selection for G_UNMERGE of vectors to vectors.
This re-uses the previous support for extract vector elt to extract the subvectors. Differential Revision: https://reviews.llvm.org/D59390 llvm-svn: 356213
This commit is contained in:
parent
90869d8494
commit
6da5735cc0
@ -93,6 +93,8 @@ private:
|
||||
bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool selectSplitVectorUnmerge(MachineInstr &I,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
|
||||
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
|
||||
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
|
||||
@ -102,6 +104,10 @@ private:
|
||||
MachineInstr *emitVectorConcat(Optional<unsigned> Dst, unsigned Op1,
|
||||
unsigned Op2,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
MachineInstr *emitExtractVectorElt(Optional<unsigned> DstReg,
|
||||
const RegisterBank &DstRB, LLT ScalarTy,
|
||||
unsigned VecReg, unsigned LaneIdx,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
|
||||
ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
|
||||
|
||||
@ -1870,6 +1876,68 @@ static bool getConstantValueForReg(unsigned Reg, MachineRegisterInfo &MRI,
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
|
||||
Optional<unsigned> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
|
||||
unsigned VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
|
||||
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
|
||||
unsigned CopyOpc = 0;
|
||||
unsigned ExtractSubReg = 0;
|
||||
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *DstRC =
|
||||
getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
|
||||
if (!DstRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
|
||||
const LLT &VecTy = MRI.getType(VecReg);
|
||||
const TargetRegisterClass *VecRC =
|
||||
getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
|
||||
if (!VecRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// The register that we're going to copy into.
|
||||
unsigned InsertReg = VecReg;
|
||||
if (!DstReg)
|
||||
DstReg = MRI.createVirtualRegister(DstRC);
|
||||
// If the lane index is 0, we just use a subregister COPY.
|
||||
if (LaneIdx == 0) {
|
||||
auto CopyMI =
|
||||
BuildMI(MIRBuilder.getMBB(), MIRBuilder.getInsertPt(),
|
||||
MIRBuilder.getDL(), TII.get(TargetOpcode::COPY), *DstReg)
|
||||
.addUse(VecReg, 0, ExtractSubReg);
|
||||
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
|
||||
return &*CopyMI;
|
||||
}
|
||||
|
||||
// Lane copies require 128-bit wide registers. If we're dealing with an
|
||||
// unpacked vector, then we need to move up to that width. Insert an implicit
|
||||
// def and a subregister insert to get us there.
|
||||
if (VecTy.getSizeInBits() != 128) {
|
||||
MachineInstr *ScalarToVector = emitScalarToVector(
|
||||
VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
|
||||
if (!ScalarToVector)
|
||||
return nullptr;
|
||||
InsertReg = ScalarToVector->getOperand(0).getReg();
|
||||
}
|
||||
|
||||
MachineInstr *LaneCopyMI =
|
||||
MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
|
||||
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
|
||||
|
||||
// Make sure that we actually constrain the initial copy.
|
||||
RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
|
||||
return LaneCopyMI;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectExtractElt(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
|
||||
@ -1878,7 +1946,7 @@ bool AArch64InstructionSelector::selectExtractElt(
|
||||
const LLT NarrowTy = MRI.getType(DstReg);
|
||||
const unsigned SrcReg = I.getOperand(1).getReg();
|
||||
const LLT WideTy = MRI.getType(SrcReg);
|
||||
|
||||
(void)WideTy;
|
||||
assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
|
||||
"source register size too small!");
|
||||
assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
|
||||
@ -1897,63 +1965,44 @@ bool AArch64InstructionSelector::selectExtractElt(
|
||||
if (!getConstantValueForReg(LaneIdxOp.getReg(), MRI, LaneIdx))
|
||||
return false;
|
||||
|
||||
unsigned CopyOpc = 0;
|
||||
unsigned ExtractSubReg = 0;
|
||||
if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) {
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
const TargetRegisterClass *DstRC =
|
||||
getRegClassForTypeOnBank(NarrowTy, DstRB, RBI, true);
|
||||
if (!DstRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
|
||||
const TargetRegisterClass *SrcRC =
|
||||
getRegClassForTypeOnBank(WideTy, SrcRB, RBI, true);
|
||||
if (!SrcRC) {
|
||||
LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// The register that we're going to copy into.
|
||||
unsigned InsertReg = SrcReg;
|
||||
MachineIRBuilder MIRBuilder(I);
|
||||
|
||||
// If the lane index is 0, we just use a subregister COPY.
|
||||
if (LaneIdx == 0) {
|
||||
unsigned CopyTo = I.getOperand(0).getReg();
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
|
||||
CopyTo)
|
||||
.addUse(SrcReg, 0, ExtractSubReg);
|
||||
RBI.constrainGenericRegister(CopyTo, *DstRC, MRI);
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
|
||||
LaneIdx, MIRBuilder);
|
||||
if (!Extract)
|
||||
return false;
|
||||
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::selectSplitVectorUnmerge(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
unsigned NumElts = I.getNumOperands() - 1;
|
||||
unsigned SrcReg = I.getOperand(NumElts).getReg();
|
||||
const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
|
||||
const LLT SrcTy = MRI.getType(SrcReg);
|
||||
|
||||
assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
|
||||
if (SrcTy.getSizeInBits() > 128) {
|
||||
LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Lane copies require 128-bit wide registers. If we're dealing with an
|
||||
// unpacked vector, then we need to move up to that width. Insert an implicit
|
||||
// def and a subregister insert to get us there.
|
||||
if (WideTy.getSizeInBits() != 128) {
|
||||
MachineInstr *ScalarToVector = emitScalarToVector(
|
||||
WideTy.getSizeInBits(), &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
|
||||
if (!ScalarToVector)
|
||||
MachineIRBuilder MIB(I);
|
||||
|
||||
// We implement a split vector operation by treating the sub-vectors as
|
||||
// scalars and extracting them.
|
||||
const RegisterBank &DstRB =
|
||||
*RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
|
||||
for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
|
||||
unsigned Dst = I.getOperand(OpIdx).getReg();
|
||||
MachineInstr *Extract =
|
||||
emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
|
||||
if (!Extract)
|
||||
return false;
|
||||
InsertReg = ScalarToVector->getOperand(0).getReg();
|
||||
}
|
||||
|
||||
MachineInstr *LaneCopyMI =
|
||||
MIRBuilder.buildInstr(CopyOpc, {DstReg}, {InsertReg}).addImm(LaneIdx);
|
||||
constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
|
||||
|
||||
// Make sure that we actually constrain the initial copy.
|
||||
RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
|
||||
|
||||
I.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
@ -1984,11 +2033,8 @@ bool AArch64InstructionSelector::selectUnmergeValues(
|
||||
assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
|
||||
"source register size too small!");
|
||||
|
||||
// TODO: Handle unmerging into vectors.
|
||||
if (!NarrowTy.isScalar()) {
|
||||
LLVM_DEBUG(dbgs() << "Vector-to-vector unmerges not supported yet.\n");
|
||||
return false;
|
||||
}
|
||||
if (!NarrowTy.isScalar())
|
||||
return selectSplitVectorUnmerge(I, MRI);
|
||||
|
||||
// Choose a lane copy opcode and subregister based off of the size of the
|
||||
// vector's elements.
|
||||
|
@ -19,6 +19,14 @@
|
||||
ret <8 x half> %a
|
||||
}
|
||||
|
||||
define <2 x float> @test_vecsplit_2v2s32_v4s32(<4 x float> %a) {
|
||||
ret <2 x float> undef
|
||||
}
|
||||
|
||||
define <2 x half> @test_vecsplit_2v2s16_v4s16(<4 x half> %a) {
|
||||
ret <2 x half> undef
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: test_v2s64_unmerge
|
||||
@ -152,3 +160,51 @@ body: |
|
||||
$q0 = COPY %1(<8 x s16>)
|
||||
RET_ReallyLR implicit $q0
|
||||
...
|
||||
---
|
||||
name: test_vecsplit_2v2s32_v4s32
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: $q0
|
||||
; CHECK-LABEL: name: test_vecsplit_2v2s32_v4s32
|
||||
; CHECK: liveins: $q0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]].dsub
|
||||
; CHECK: [[CPYi64_:%[0-9]+]]:fpr64 = CPYi64 [[COPY]], 1
|
||||
; CHECK: $d0 = COPY [[COPY1]]
|
||||
; CHECK: $d1 = COPY [[CPYi64_]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:fpr(<4 x s32>) = COPY $q0
|
||||
%1:fpr(<2 x s32>), %2:fpr(<2 x s32>) = G_UNMERGE_VALUES %0(<4 x s32>)
|
||||
$d0 = COPY %1(<2 x s32>)
|
||||
$d1 = COPY %2(<2 x s32>)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
---
|
||||
name: test_vecsplit_2v2s16_v4s16
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: $d0
|
||||
; CHECK-LABEL: name: test_vecsplit_2v2s16_v4s16
|
||||
; CHECK: liveins: $d0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]].ssub
|
||||
; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
|
||||
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub
|
||||
; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1
|
||||
; CHECK: $s0 = COPY [[COPY1]]
|
||||
; CHECK: $s1 = COPY [[CPYi32_]]
|
||||
; CHECK: RET_ReallyLR implicit $s0
|
||||
%0:fpr(<4 x s16>) = COPY $d0
|
||||
%1:fpr(<2 x s16>), %2:fpr(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
|
||||
$s0 = COPY %1(<2 x s16>)
|
||||
$s1 = COPY %2(<2 x s16>)
|
||||
RET_ReallyLR implicit $s0
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user