1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[AArch64][GlobalISel] Select uzp1 and uzp2

Porting the mask stuff for uzp1 and uzp2 from AArch64ISelLowering.

Add two custom opcodes: G_UZP1 and G_UZP2.

Produce them in the post-legalizer combiner when the mask checks out.

Tests:

- postlegalizer-combiner-uzp.mir verifies that we create G_UZP1 and G_UZP2.
The testcases that check that we create them come from neon-perm.ll.

- select-uzp.mir verifies that we can select G_UZP1 and G_UZP2.

Differential Revision: https://reviews.llvm.org/D81049
This commit is contained in:
Jessica Paquette 2020-06-02 09:30:04 -07:00
parent 16c247c68a
commit 8210c19b3b
5 changed files with 265 additions and 3 deletions

View File

@ -29,11 +29,24 @@ def zip : GICombineRule<
(defs root:$root, zip_matchdata:$matchinfo), (defs root:$root, zip_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root, (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchZip(*${root}, MRI, ${matchinfo}); }]), [{ return matchZip(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyZip(*${root}, ${matchinfo}); }]) (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
>; >;
def uzp_matchdata : GIDefMatchData<"unsigned">;
def uzp : GICombineRule<
(defs root:$root, uzp_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchUZP(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
>;
// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
// instruction.
def shuffle_vector_pseudos : GICombineGroup<[zip, uzp]>;
def AArch64PostLegalizerCombinerHelper def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
[erase_undef_store, combines_for_extload, zip]> { [erase_undef_store, combines_for_extload,
shuffle_vector_pseudos]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule"; let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
} }

View File

@ -25,6 +25,20 @@ def G_ADD_LOW : AArch64GenericInstruction {
let hasSideEffects = 0; let hasSideEffects = 0;
} }
// Represents an uzp1 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_UZP1 : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$v1, type0:$v2);
}
// Represents an uzp2 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks.
def G_UZP2 : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$v1, type0:$v2);
}
// Represents a zip1 instruction. Produced post-legalization from // Represents a zip1 instruction. Produced post-legalization from
// G_SHUFFLE_VECTORs with appropriate masks. // G_SHUFFLE_VECTORs with appropriate masks.
def G_ZIP1 : AArch64GenericInstruction { def G_ZIP1 : AArch64GenericInstruction {
@ -39,5 +53,7 @@ def G_ZIP2 : AArch64GenericInstruction {
let InOperandList = (ins type0:$v1, type0:$v2); let InOperandList = (ins type0:$v1, type0:$v2);
} }
def : GINodeEquiv<G_UZP1, AArch64uzp1>;
def : GINodeEquiv<G_UZP2, AArch64uzp2>;
def : GINodeEquiv<G_ZIP1, AArch64zip1>; def : GINodeEquiv<G_ZIP1, AArch64zip1>;
def : GINodeEquiv<G_ZIP2, AArch64zip2>; def : GINodeEquiv<G_ZIP2, AArch64zip2>;

View File

@ -28,6 +28,21 @@
using namespace llvm; using namespace llvm;
/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
unsigned &WhichResult) {
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
// Skip undef indices.
if (M[i] < 0)
continue;
if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult)
return false;
}
return true;
}
/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
static bool isZipMask(ArrayRef<int> M, unsigned NumElts, static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
@ -47,6 +62,23 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
return true; return true;
} }
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
/// a G_UZP1 or G_UZP2 instruction.
///
/// \param [in] MI - The shuffle vector instruction.
/// \param [out] Opc - Either G_UZP1 or G_UZP2 on success.
static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned &Opc) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
unsigned WhichResult;
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
unsigned NumElts = MRI.getType(MI.getOperand(0).getReg()).getNumElements();
if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
return false;
Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
return true;
}
static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
unsigned &Opc) { unsigned &Opc) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
@ -59,7 +91,9 @@ static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
return true; return true;
} }
static bool applyZip(MachineInstr &MI, unsigned Opc) { /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
static bool applyShuffleVectorPseudo(MachineInstr &MI, unsigned Opc) {
MachineIRBuilder MIRBuilder(MI); MachineIRBuilder MIRBuilder(MI);
MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()}, MIRBuilder.buildInstr(Opc, {MI.getOperand(0).getReg()},
{MI.getOperand(1).getReg(), MI.getOperand(2).getReg()}); {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});

View File

@ -0,0 +1,146 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# Check that we can recognize a shuffle mask for a uzp instruction and produce
# a G_UZP1 or G_UZP2 where appropriate.
#
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
...
---
name: uzp1_v4s32
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; CHECK-LABEL: name: uzp1_v4s32
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 2, 4, 6)
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: uzp2_v4s32
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; CHECK-LABEL: name: uzp2_v4s32
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, 5, 7)
$q0 = COPY %1(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: no_uzp1
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; See isUZPMask: Mask[1] != 2 * i + 0
; CHECK-LABEL: name: no_uzp1
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 1, 4, 6)
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: no_uzp2
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; See isUZPMask: Mask[1] != 2 * i + 1
; CHECK-LABEL: name: no_uzp2
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
; CHECK: $q0 = COPY [[SHUF]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 4, 5, 7)
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: uzp1_undef
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; Make sure that we can still produce a uzp1/uzp2 with undef indices.
; CHECK-LABEL: name: uzp1_undef
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[COPY]], [[COPY1]]
; CHECK: $q0 = COPY [[UZP1_]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, -1, 4, 6)
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: uzp2_undef
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; Make sure that we can still produce a uzp1/uzp2 with undef indices.
; CHECK-LABEL: name: uzp2_undef
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[UZP2_]]
; CHECK: $q0 = COPY [[UZP2_]](<4 x s32>)
; CHECK: RET_ReallyLR implicit $q0
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%1:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 3, -1, 7)
$q0 = COPY %1(<4 x s32>)
RET_ReallyLR implicit $q0

View File

@ -0,0 +1,53 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# Check that we can select G_UZP1 and G_UZP2 via the tablegen importer.
#
# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
...
---
name: uzp1_v4s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; CHECK-LABEL: name: uzp1_v4s32
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[UZP1v4i32_:%[0-9]+]]:fpr128 = UZP1v4i32 [[COPY]], [[COPY1]]
; CHECK: $q0 = COPY [[UZP1v4i32_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<4 x s32>) = COPY $q0
%1:fpr(<4 x s32>) = COPY $q1
%2:fpr(<4 x s32>) = G_UZP1 %0, %1
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: uzp2_v4s32
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $q0, $q1
; CHECK-LABEL: name: uzp2_v4s32
; CHECK: liveins: $q0, $q1
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
; CHECK: [[UZP2v4i32_:%[0-9]+]]:fpr128 = UZP2v4i32 [[COPY]], [[COPY1]]
; CHECK: $q0 = COPY [[UZP2v4i32_]]
; CHECK: RET_ReallyLR implicit $q0
%0:fpr(<4 x s32>) = COPY $q0
%1:fpr(<4 x s32>) = COPY $q1
%2:fpr(<4 x s32>) = G_UZP2 %0, %1
$q0 = COPY %2(<4 x s32>)
RET_ReallyLR implicit $q0
...