mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[AArch64][GlobalISel] Select uzp1 and uzp2
Porting the mask stuff for uzp1 and uzp2 from AArch64ISelLowering. Add two custom opcodes: G_UZP1 and G_UZP2. Produce them in the post-legalizer combiner when the mask checks out. Tests: - postlegalizer-combiner-uzp.mir verifies that we create G_UZP1 and G_UZP2. The testcases that check that we create them come from neon-perm.ll. - select-uzp.mir verifies that we can select G_UZP1 and G_UZP2. Differential Revision: https://reviews.llvm.org/D81049
This commit is contained in:
parent
16c247c68a
commit
8210c19b3b
@ -29,11 +29,24 @@ def zip : GICombineRule<
|
|||||||
(defs root:$root, zip_matchdata:$matchinfo),
|
(defs root:$root, zip_matchdata:$matchinfo),
|
||||||
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||||
[{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
|
[{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
|
||||||
(apply [{ applyZip(*${root}, ${matchinfo}); }])
|
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def uzp_matchdata : GIDefMatchData<"unsigned">;
|
||||||
|
def uzp : GICombineRule<
|
||||||
|
(defs root:$root, uzp_matchdata:$matchinfo),
|
||||||
|
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
|
||||||
|
[{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
|
||||||
|
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
|
||||||
|
>;
|
||||||
|
|
||||||
|
// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
|
||||||
|
// instruction.
|
||||||
|
def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>;
|
||||||
|
|
||||||
def AArch64PostLegalizerCombinerHelper
|
def AArch64PostLegalizerCombinerHelper
|
||||||
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
||||||
[erase_undef_store, combines_for_extload, zip]> {
|
[erase_undef_store, combines_for_extload,
|
||||||
|
shuffle_vector_pseudos]> {
|
||||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,20 @@ def G_ADD_LOW : AArch64GenericInstruction {
|
|||||||
let hasSideEffects = 0;
|
let hasSideEffects = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Represents an uzp1 instruction. Produced post-legalization from
|
||||||
|
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||||
|
def G_UZP1 : AArch64GenericInstruction {
|
||||||
|
let OutOperandList = (outs type0:$dst);
|
||||||
|
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Represents an uzp2 instruction. Produced post-legalization from
|
||||||
|
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||||
|
def G_UZP2 : AArch64GenericInstruction {
|
||||||
|
let OutOperandList = (outs type0:$dst);
|
||||||
|
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||||
|
}
|
||||||
|
|
||||||
// Represents a zip1 instruction. Produced post-legalization from
|
// Represents a zip1 instruction. Produced post-legalization from
|
||||||
// G_SHUFFLE_VECTORs with appropriate masks.
|
// G_SHUFFLE_VECTORs with appropriate masks.
|
||||||
def G_ZIP1 : AArch64GenericInstruction {
|
def G_ZIP1 : AArch64GenericInstruction {
|
||||||
@ -39,5 +53,7 @@ def G_ZIP2 : AArch64GenericInstruction {
|
|||||||
let InOperandList = (ins type0:$v1, type0:$v2);
|
let InOperandList = (ins type0:$v1, type0:$v2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def : GINodeEquiv<G_UZP1, AArch64uzp1>;
|
||||||
|
def : GINodeEquiv<G_UZP2, AArch64uzp2>;
|
||||||
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
|
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
|
||||||
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
|
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
|
||||||
|
@ -28,6 +28,21 @@
|
|||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
|
||||||
|
/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
|
||||||
|
static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
|
||||||
|
unsigned &WhichResult) {
|
||||||
|
WhichResult = (M[0] == 0 ? 0 : 1);
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i) {
|
||||||
|
// Skip undef indices.
|
||||||
|
if (M[i] < 0)
|
||||||
|
continue;
|
||||||
|
if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
|
/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
|
||||||
/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
|
/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
|
||||||
static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
|
static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
|
||||||
@ -47,6 +62,23 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
|
||||||
|
/// a G_UZP1 or G_UZP2 instruction.
|
||||||
|
///
|
||||||
|
/// \param [in] MI - The shuffle vector instruction.
|
||||||
|
/// \param [out] Opc - Either G_UZP1 or G_UZP2 on success.
|
||||||
|
static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||||
|
unsigned &Opc) {
|
||||||
|
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
|
||||||
|
unsigned WhichResult;
|
||||||
|
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
|
||||||
|
unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements();
|
||||||
|
if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
|
||||||
|
return false;
|
||||||
|
Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
|
static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||||
unsigned &Opc) {
|
unsigned &Opc) {
|
||||||
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
|
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
|
||||||
@ -59,7 +91,9 @@ static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool applyZip(MachineInstr &MI, unsigned Opc) {
|
/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
|
||||||
|
/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
|
||||||
|
static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) {
|
||||||
MachineIRBuilder MIRBuilder(MI);
|
MachineIRBuilder MIRBuilder(MI);
|
||||||
MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()},
|
MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()},
|
||||||
{MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
|
{MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
|
||||||
|
146
test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir
Normal file
146
test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-uzp.mir
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
#
|
||||||
|
# Check that we can recognize a shuffle mask for a uzp instruction and produce
|
||||||
|
# a G_UZP1 or G_UZP2 where appropriate.
|
||||||
|
#
|
||||||
|
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp1_v4s32
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp1_v4s32
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6)
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp2_v4s32
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp2_v4s32
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
|
||||||
|
$q0 = COPY %1(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: no_uzp1
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; See isUZPMask: Mask[1] != 2 * i + 0
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: no_uzp1
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
|
||||||
|
; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6)
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: no_uzp2
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; See isUZPMask: Mask[1] != 2 * i + 1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: no_uzp2
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
|
||||||
|
; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7)
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp1_undef
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; Make sure that we can still produce a uzp1/uzp2 with undef indices.
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp1_undef
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6)
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp2_undef
|
||||||
|
legalized: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; Make sure that we can still produce a uzp1/uzp2 with undef indices.
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp2_undef
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
|
||||||
|
; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:_(<4 x s32>) = COPY $q0
|
||||||
|
%1:_(<4 x s32>) = COPY $q1
|
||||||
|
%1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
|
||||||
|
$q0 = COPY %1(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
53
test/CodeGen/AArch64/GlobalISel/select-uzp.mir
Normal file
53
test/CodeGen/AArch64/GlobalISel/select-uzp.mir
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
#
|
||||||
|
# Check that we can select G_UZP1 and G_UZP2 via the tablegen importer.
|
||||||
|
#
|
||||||
|
# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp1_v4s32
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp1_v4s32
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
|
||||||
|
; CHECK: [[UZP1v4i32_:%[0-9]+]]:fpr128 = UZP1v4i32 [[COPY]], [[COPY1]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP1v4i32_]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:fpr(<4 x s32>) = COPY $q0
|
||||||
|
%1:fpr(<4 x s32>) = COPY $q1
|
||||||
|
%2:fpr(<4 x s32>) = G_UZP1 %0, %1
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
||||||
|
name: uzp2_v4s32
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.1.entry:
|
||||||
|
liveins: $q0, $q1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: uzp2_v4s32
|
||||||
|
; CHECK: liveins: $q0, $q1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
|
||||||
|
; CHECK: [[UZP2v4i32_:%[0-9]+]]:fpr128 = UZP2v4i32 [[COPY]], [[COPY1]]
|
||||||
|
; CHECK: $q0 = COPY [[UZP2v4i32_]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $q0
|
||||||
|
%0:fpr(<4 x s32>) = COPY $q0
|
||||||
|
%1:fpr(<4 x s32>) = COPY $q1
|
||||||
|
%2:fpr(<4 x s32>) = G_UZP2 %0, %1
|
||||||
|
$q0 = COPY %2(<4 x s32>)
|
||||||
|
RET_ReallyLR implicit $q0
|
||||||
|
|
||||||
|
...
|
Loading…
Reference in New Issue
Block a user