mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
[GlobalISel] Implement splitting of G_SHUFFLE_VECTOR.
Thhis is a port from the DAG legalization. We're still missing some of the canonicalizations of shuffles but it's a start. Differential Revision: https://reviews.llvm.org/D102828
This commit is contained in:
parent
06613cf382
commit
d5383816bc
@ -328,6 +328,9 @@ public:
|
||||
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI,
|
||||
unsigned TypeIdx, LLT NarrowTy);
|
||||
|
||||
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT NarrowTy);
|
||||
|
||||
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT NarrowTy);
|
||||
|
@ -23,7 +23,9 @@
|
||||
#include "llvm/CodeGen/TargetFrameLowering.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -4244,11 +4246,154 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
|
||||
return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
|
||||
GISEL_VECREDUCE_CASES_NONSEQ
|
||||
return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
|
||||
case G_SHUFFLE_VECTOR:
|
||||
return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
|
||||
default:
|
||||
return UnableToLegalize;
|
||||
}
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
|
||||
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register Src1Reg = MI.getOperand(1).getReg();
|
||||
Register Src2Reg = MI.getOperand(2).getReg();
|
||||
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
LLT Src1Ty = MRI.getType(Src1Reg);
|
||||
LLT Src2Ty = MRI.getType(Src2Reg);
|
||||
// The shuffle should be canonicalized by now.
|
||||
if (DstTy != Src1Ty)
|
||||
return UnableToLegalize;
|
||||
if (DstTy != Src2Ty)
|
||||
return UnableToLegalize;
|
||||
|
||||
if (!isPowerOf2_32(DstTy.getNumElements()))
|
||||
return UnableToLegalize;
|
||||
|
||||
// We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
|
||||
// Further legalization attempts will be needed to do split further.
|
||||
NarrowTy = DstTy.changeNumElements(DstTy.getNumElements() / 2);
|
||||
unsigned NewElts = NarrowTy.getNumElements();
|
||||
|
||||
SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
|
||||
extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
|
||||
extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
|
||||
Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
|
||||
SplitSrc2Regs[1]};
|
||||
|
||||
Register Hi, Lo;
|
||||
|
||||
// If Lo or Hi uses elements from at most two of the four input vectors, then
|
||||
// express it as a vector shuffle of those two inputs. Otherwise extract the
|
||||
// input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
|
||||
SmallVector<int, 16> Ops;
|
||||
for (unsigned High = 0; High < 2; ++High) {
|
||||
Register &Output = High ? Hi : Lo;
|
||||
|
||||
// Build a shuffle mask for the output, discovering on the fly which
|
||||
// input vectors to use as shuffle operands (recorded in InputUsed).
|
||||
// If building a suitable shuffle vector proves too hard, then bail
|
||||
// out with useBuildVector set.
|
||||
unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
|
||||
unsigned FirstMaskIdx = High * NewElts;
|
||||
bool UseBuildVector = false;
|
||||
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
|
||||
// The mask element. This indexes into the input.
|
||||
int Idx = Mask[FirstMaskIdx + MaskOffset];
|
||||
|
||||
// The input vector this mask element indexes into.
|
||||
unsigned Input = (unsigned)Idx / NewElts;
|
||||
|
||||
if (Input >= array_lengthof(Inputs)) {
|
||||
// The mask element does not index into any input vector.
|
||||
Ops.push_back(-1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Turn the index into an offset from the start of the input vector.
|
||||
Idx -= Input * NewElts;
|
||||
|
||||
// Find or create a shuffle vector operand to hold this input.
|
||||
unsigned OpNo;
|
||||
for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
|
||||
if (InputUsed[OpNo] == Input) {
|
||||
// This input vector is already an operand.
|
||||
break;
|
||||
} else if (InputUsed[OpNo] == -1U) {
|
||||
// Create a new operand for this input vector.
|
||||
InputUsed[OpNo] = Input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (OpNo >= array_lengthof(InputUsed)) {
|
||||
// More than two input vectors used! Give up on trying to create a
|
||||
// shuffle vector. Insert all elements into a BUILD_VECTOR instead.
|
||||
UseBuildVector = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Add the mask index for the new shuffle vector.
|
||||
Ops.push_back(Idx + OpNo * NewElts);
|
||||
}
|
||||
|
||||
if (UseBuildVector) {
|
||||
LLT EltTy = NarrowTy.getElementType();
|
||||
SmallVector<Register, 16> SVOps;
|
||||
|
||||
// Extract the input elements by hand.
|
||||
for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
|
||||
// The mask element. This indexes into the input.
|
||||
int Idx = Mask[FirstMaskIdx + MaskOffset];
|
||||
|
||||
// The input vector this mask element indexes into.
|
||||
unsigned Input = (unsigned)Idx / NewElts;
|
||||
|
||||
if (Input >= array_lengthof(Inputs)) {
|
||||
// The mask element is "undef" or indexes off the end of the input.
|
||||
SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Turn the index into an offset from the start of the input vector.
|
||||
Idx -= Input * NewElts;
|
||||
|
||||
// Extract the vector element by hand.
|
||||
SVOps.push_back(MIRBuilder
|
||||
.buildExtractVectorElement(
|
||||
EltTy, Inputs[Input],
|
||||
MIRBuilder.buildConstant(LLT::scalar(32), Idx))
|
||||
.getReg(0));
|
||||
}
|
||||
|
||||
// Construct the Lo/Hi output using a G_BUILD_VECTOR.
|
||||
Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
|
||||
} else if (InputUsed[0] == -1U) {
|
||||
// No input vectors were used! The result is undefined.
|
||||
Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
|
||||
} else {
|
||||
Register Op0 = Inputs[InputUsed[0]];
|
||||
// If only one input was used, use an undefined vector for the other.
|
||||
Register Op1 = InputUsed[1] == -1U
|
||||
? MIRBuilder.buildUndef(NarrowTy).getReg(0)
|
||||
: Inputs[InputUsed[1]];
|
||||
// At least one input vector was used. Create a new shuffle vector.
|
||||
Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
|
||||
}
|
||||
|
||||
Ops.clear();
|
||||
}
|
||||
|
||||
MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
|
||||
MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
|
||||
unsigned Opc = MI.getOpcode();
|
||||
|
@ -136,3 +136,79 @@ body: |
|
||||
RET_ReallyLR implicit $d0, implicit $d1
|
||||
|
||||
...
|
||||
---
|
||||
name: oversize_shuffle_v4i64
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0, $q1, $q2, $q3, $x0
|
||||
|
||||
; CHECK-LABEL: name: oversize_shuffle_v4i64
|
||||
; CHECK: liveins: $q0, $q1, $q2, $q3, $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2)
|
||||
; CHECK: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2)
|
||||
; CHECK: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store 16, align 32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64)
|
||||
; CHECK: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16)
|
||||
; CHECK: RET_ReallyLR
|
||||
%3:_(<2 x s64>) = COPY $q0
|
||||
%4:_(<2 x s64>) = COPY $q1
|
||||
%0:_(<4 x s64>) = G_CONCAT_VECTORS %3(<2 x s64>), %4(<2 x s64>)
|
||||
%5:_(<2 x s64>) = COPY $q2
|
||||
%6:_(<2 x s64>) = COPY $q3
|
||||
%1:_(<4 x s64>) = G_CONCAT_VECTORS %5(<2 x s64>), %6(<2 x s64>)
|
||||
%2:_(p0) = COPY $x0
|
||||
%7:_(<4 x s64>) = G_SHUFFLE_VECTOR %0(<4 x s64>), %1, shufflemask(3, 4, 7, 0)
|
||||
G_STORE %7(<4 x s64>), %2(p0) :: (store 32)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
---
|
||||
name: oversize_shuffle_v8i32_build_vector
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $q0, $q1, $q2, $q3, $x0
|
||||
|
||||
; CHECK-LABEL: name: oversize_shuffle_v8i32_build_vector
|
||||
; CHECK: liveins: $q0, $q1, $q2, $q3, $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
|
||||
; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
|
||||
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
|
||||
; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C2]](s64)
|
||||
; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
|
||||
; CHECK: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64)
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3)
|
||||
; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store 16, align 32)
|
||||
; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64)
|
||||
; CHECK: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store 16 into unknown-address + 16)
|
||||
; CHECK: RET_ReallyLR
|
||||
%3:_(<4 x s32>) = COPY $q0
|
||||
%4:_(<4 x s32>) = COPY $q1
|
||||
%0:_(<8 x s32>) = G_CONCAT_VECTORS %3(<4 x s32>), %4(<4 x s32>)
|
||||
%5:_(<4 x s32>) = COPY $q2
|
||||
%6:_(<4 x s32>) = COPY $q3
|
||||
%1:_(<8 x s32>) = G_CONCAT_VECTORS %5(<4 x s32>), %6(<4 x s32>)
|
||||
%2:_(p0) = COPY $x0
|
||||
%7:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %1, shufflemask(0, 5, 10, 15, 6, 2, 1, 7)
|
||||
G_STORE %7(<8 x s32>), %2(p0) :: (store 32)
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
Loading…
Reference in New Issue
Block a user