From 3bda78713bdb9dd2a85f1e00bd4c1c902f3462e6 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Mon, 8 Jun 2020 14:02:15 -0700 Subject: [PATCH] [AArch64][GlobalISel] Add G_EXT and select ext using it Add selection support for ext via a new opcode, G_EXT and a post-legalizer combine which matches it. Add an `applyEXT` function, because the AArch64ext patterns require a register for the immediate. So, we have to create a G_CONSTANT to get these without writing new patterns or modifying the existing ones. Tests are the same as arm64-ext.ll. Also prevent ext from firing on the zip test. It has higher priority, so we don't want it potentially getting in the way of mask tests. Also fix up the shuffle-splat test, because ext is now selected there. The test was incorrectly regbank selected before, which could cause a verifier failure when you emit copies. Differential Revision: https://reviews.llvm.org/D81436 --- lib/Target/AArch64/AArch64Combine.td | 9 +- lib/Target/AArch64/AArch64InstrGISel.td | 8 + .../GISel/AArch64PostLegalizerCombiner.cpp | 77 ++++++ .../GlobalISel/postlegalizer-combiner-ext.mir | 258 ++++++++++++++++++ .../postlegalizer-combiner-shuffle-splat.mir | 214 +++++++-------- .../GlobalISel/postlegalizer-combiner-zip.mir | 2 +- .../CodeGen/AArch64/GlobalISel/select-ext.mir | 153 +++++++++++ 7 files changed, 606 insertions(+), 115 deletions(-) create mode 100644 test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir create mode 100644 test/CodeGen/AArch64/GlobalISel/select-ext.mir diff --git a/lib/Target/AArch64/AArch64Combine.td b/lib/Target/AArch64/AArch64Combine.td index f45a3b560cf..072008b07ce 100644 --- a/lib/Target/AArch64/AArch64Combine.td +++ b/lib/Target/AArch64/AArch64Combine.td @@ -63,9 +63,16 @@ def trn : GICombineRule< (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def ext: GICombineRule < + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchEXT(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyEXT(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>; +def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", diff --git a/lib/Target/AArch64/AArch64InstrGISel.td b/lib/Target/AArch64/AArch64InstrGISel.td index 6b7754d60de..a0e7c782f68 100644 --- a/lib/Target/AArch64/AArch64InstrGISel.td +++ b/lib/Target/AArch64/AArch64InstrGISel.td @@ -104,6 +104,13 @@ def G_TRN2 : AArch64GenericInstruction { let hasSideEffects = 0; } +// Represents an ext instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_EXT: AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2, untyped_imm_0:$imm); +} + def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -114,3 +121,4 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; diff --git a/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index a2cad2fad4c..3217068c4a6 100644 --- a/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -111,6 +111,48 @@ static bool isTRNMask(ArrayRef M, unsigned NumElts, return true; } +/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector +/// sources of the shuffle are different. +static Optional> getExtMask(ArrayRef M, + unsigned NumElts) { + // Look for the first non-undef element. + auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); + if (FirstRealElt == M.end()) + return None; + + // Use APInt to handle overflow when calculating expected element. + unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); + APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); + + // The following shuffle indices must be the successive elements after the + // first real element. + if (any_of( + make_range(std::next(FirstRealElt), M.end()), + [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) + return None; + + // The index of an EXT is the first element if it is not UNDEF. + // Watch out for the beginning UNDEFs. The EXT index should be the expected + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + uint64_t Imm = ExpectedElt.getZExtValue(); + bool ReverseExt = false; + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) + ReverseExt = true; + else + Imm -= NumElts; + return std::make_pair(ReverseExt, Imm); +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -271,6 +313,27 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } +static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + Register Dst = MI.getOperand(0).getReg(); + auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), + MRI.getType(Dst).getNumElements()); + if (!ExtInfo) + return false; + bool ReverseExt; + uint64_t Imm; + std::tie(ReverseExt, Imm) = *ExtInfo; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + if (ReverseExt) + std::swap(V1, V2); + uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; + Imm *= ExtFactor; + MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); + return true; +} + /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. static bool applyShuffleVectorPseudo(MachineInstr &MI, @@ -281,6 +344,20 @@ static bool applyShuffleVectorPseudo(MachineInstr &MI, return true; } +/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. +/// Special-cased because the constant operand must be emitted as a G_CONSTANT +/// for the imported tablegen patterns to work. +static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { + MachineIRBuilder MIRBuilder(MI); + // Tablegen patterns expect an i32 G_CONSTANT as the final op. + auto Cst = + MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); + MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, + {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); + MI.eraseFromParent(); + return true; +} + #define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS #include "AArch64GenPostLegalizeGICombiner.inc" #undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS diff --git a/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir new file mode 100644 index 00000000000..18f68af46c2 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-ext.mir @@ -0,0 +1,258 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we can combine a G_SHUFFLE_VECTOR into a G_EXT. + +... +--- +name: v8s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst3 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v8s8_cst5 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v8s8_cst5 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(13, 14, 15, 0, 1, 2, 3, 4) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: v16s8_cst3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v16s8_cst7 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v16s8_cst7 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: v4s16_cst6 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: v4s16_cst6 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<4 x s16>) = COPY $d0 + ; CHECK: %v2:_(<4 x s16>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<4 x s16>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<4 x s16>) = COPY $d0 + %v2:_(<4 x s16>) = COPY $d1 + %shuf:_(<4 x s16>) = G_SHUFFLE_VECTOR %v1(<4 x s16>), %v2, shufflemask(3, 4, 5, 6) + $d0 = COPY %shuf(<4 x s16>) + RET_ReallyLR implicit $d0 +... +--- +name: v4s32_cst12 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: v4s32_cst12 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<4 x s32>) = COPY $q0 + ; CHECK: %v2:_(<4 x s32>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK: %shuf:_(<4 x s32>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<4 x s32>) = COPY $q0 + %v2:_(<4 x s32>) = COPY $q1 + %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %v1(<4 x s32>), %v2, shufflemask(3, 4, 5, 6) + $q0 = COPY %shuf(<4 x s32>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 3 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v1, %v2, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(3, -1, -1, 6, 7, 8, 9, 10) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 6 here. + ; + ; CHECK-LABEL: name: undef_elts_should_match_2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:_(<8 x s8>) = COPY $d0 + ; CHECK: %v2:_(<8 x s8>) = COPY $d1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK: %shuf:_(<8 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $d0 = COPY %shuf(<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %v1:_(<8 x s8>) = COPY $d0 + %v2:_(<8 x s8>) = COPY $d1 + %shuf:_(<8 x s8>) = G_SHUFFLE_VECTOR %v1(<8 x s8>), %v2, shufflemask(-1, -1, -1, -1, 2, 3, 4, 5) + $d0 = COPY %shuf(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: undef_elts_should_match_3 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 7 here. + ; CHECK-LABEL: name: undef_elts_should_match_3 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<16 x s8>) = COPY $q0 + ; CHECK: %v2:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: %shuf:_(<16 x s8>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<16 x s8>) = COPY $q0 + %v2:_(<16 x s8>) = COPY $q1 + %shuf:_(<16 x s8>) = G_SHUFFLE_VECTOR %v1(<16 x s8>), %v2, shufflemask(23, 24, 25, 26, -1, -1, 29, 30, 31, 0, 1, 2, 3, 4, -1, 6) + $q0 = COPY %shuf(<16 x s8>) + RET_ReallyLR implicit $q0 +... +--- +name: undef_elts_should_match_4 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; Undef shuffle indices should not prevent matching G_EXT. + ; We should get a constant 10 here. + ; CHECK-LABEL: name: undef_elts_should_match_4 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %v2:_(<8 x s16>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: %shuf:_(<8 x s16>) = G_EXT %v2, %v1, [[C]](s32) + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, 1, 2, 3, 4) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... +--- +name: all_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; We expect at least one defined element in the shuffle mask. + ; + ; CHECK-LABEL: name: all_undef + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:_(<8 x s16>) = COPY $q0 + ; CHECK: %shuf:_(<8 x s16>) = G_REV64 %v1 + ; CHECK: $q0 = COPY %shuf(<8 x s16>) + ; CHECK: RET_ReallyLR implicit $q0 + %v1:_(<8 x s16>) = COPY $q0 + %v2:_(<8 x s16>) = COPY $q1 + %shuf:_(<8 x s16>) = G_SHUFFLE_VECTOR %v1(<8 x s16>), %v2, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1) + $q0 = COPY %shuf(<8 x s16>) + RET_ReallyLR implicit $q0 +... diff --git a/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir index 2ff208b0f6f..2325b0fb034 100644 --- a/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir +++ b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-shuffle-splat.mir @@ -5,7 +5,6 @@ name: splat_4xi32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -13,15 +12,15 @@ body: | ; CHECK-LABEL: name: splat_4xi32 ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s32) = COPY $w0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) + %0:_(s32) = COPY $w0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -30,7 +29,6 @@ body: | name: splat_2xi64 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -38,15 +36,15 @@ body: | ; CHECK-LABEL: name: splat_2xi64 ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -55,7 +53,6 @@ body: | name: splat_2xi32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -63,15 +60,15 @@ body: | ; CHECK-LABEL: name: splat_2xi32 ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 - %0:gpr(s32) = COPY $w0 - %2:fpr(<2 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) + %0:_(s32) = COPY $w0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -80,7 +77,6 @@ body: | name: splat_4xf32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -88,15 +84,15 @@ body: | ; CHECK-LABEL: name: splat_4xf32 ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -105,7 +101,6 @@ body: | name: splat_2xf64 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -113,15 +108,15 @@ body: | ; CHECK-LABEL: name: splat_2xf64 ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $d0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -130,7 +125,6 @@ body: | name: splat_2xf32 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -138,15 +132,15 @@ body: | ; CHECK-LABEL: name: splat_2xf32 ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<2 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) + %0:_(s32) = COPY $s0 + %2:_(<2 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -155,7 +149,6 @@ body: | name: splat_2xf64_copies alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -165,17 +158,17 @@ body: | ; These copies shouldn't get in the way of matching the dup pattern. ; CHECK-LABEL: name: splat_2xf64_copies ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s64) = COPY $d0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %6:fpr(<2 x s64>) = COPY %2 - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) - %7:fpr(<2 x s64>) = COPY %1 - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0) + %0:_(s64) = COPY $d0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %6:_(<2 x s64>) = COPY %2 + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) + %7:_(<2 x s64>) = COPY %1 + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -184,7 +177,6 @@ body: | name: not_all_zeros alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -192,18 +184,19 @@ body: | ; Make sure that we don't do the optimization when it's not all zeroes. ; CHECK-LABEL: name: not_all_zeros ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DEF:%[0-9]+]]:fpr(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:fpr(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 1) - ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[EXT:%[0-9]+]]:_(<2 x s64>) = G_EXT [[IVEC]], [[DEF]], [[C1]](s32) + ; CHECK: $q0 = COPY [[EXT]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -212,7 +205,6 @@ body: | name: all_undef alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -224,15 +216,15 @@ body: | ; ; CHECK-LABEL: name: all_undef ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK: [[DUP:%[0-9]+]]:_(<2 x s64>) = G_DUP [[COPY]](s64) ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 - %0:gpr(s64) = COPY $x0 - %2:fpr(<2 x s64>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) - %4:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1) + %0:_(s64) = COPY $x0 + %2:_(<2 x s64>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -241,7 +233,6 @@ body: | name: one_undef alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -252,15 +243,15 @@ body: | ; ; CHECK-LABEL: name: one_undef ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DUP:%[0-9]+]]:_(<4 x s32>) = G_DUP [[COPY]](s32) ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -269,7 +260,6 @@ body: | name: not_all_zeros_with_undefs alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: @@ -278,18 +268,18 @@ body: | ; ; CHECK-LABEL: name: not_all_zeros_with_undefs ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DEF:%[0-9]+]]:fpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:fpr(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3) ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 - %0:fpr(s32) = COPY $s0 - %2:fpr(<4 x s32>) = G_IMPLICIT_DEF - %3:gpr(s32) = G_CONSTANT i32 0 - %1:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) - %4:fpr(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) + %0:_(s32) = COPY $s0 + %2:_(<4 x s32>) = G_IMPLICIT_DEF + %3:_(s32) = G_CONSTANT i32 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -298,22 +288,21 @@ body: | name: splat_4xi16 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: liveins: $h0 ; CHECK-LABEL: name: splat_4xi16 ; CHECK: liveins: $h0 - ; CHECK: %copy:fpr(s16) = COPY $h0 - ; CHECK: %splat:fpr(<4 x s16>) = G_DUP %copy(s16) + ; CHECK: %copy:_(s16) = COPY $h0 + ; CHECK: %splat:_(<4 x s16>) = G_DUP %copy(s16) ; CHECK: $d0 = COPY %splat(<4 x s16>) ; CHECK: RET_ReallyLR implicit $d0 - %copy:fpr(s16) = COPY $h0 - %undef:fpr(<4 x s16>) = G_IMPLICIT_DEF - %cst:gpr(s32) = G_CONSTANT i32 0 - %ins:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) - %splat:fpr(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) + %copy:_(s16) = COPY $h0 + %undef:_(<4 x s16>) = G_IMPLICIT_DEF + %cst:_(s32) = G_CONSTANT i32 0 + %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) + %splat:_(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) $d0 = COPY %splat(<4 x s16>) RET_ReallyLR implicit $d0 @@ -322,21 +311,20 @@ body: | name: splat_8xi8 alignment: 4 legalized: true -regBankSelected: true tracksRegLiveness: true body: | bb.1.entry: liveins: $w0 ; CHECK-LABEL: name: splat_8xi8 ; CHECK: liveins: $w0 - ; CHECK: %copy:gpr(s32) = COPY $w0 - ; CHECK: %splat:fpr(<8 x s8>) = G_DUP %copy(s32) + ; CHECK: %copy:_(s32) = COPY $w0 + ; CHECK: %splat:_(<8 x s8>) = G_DUP %copy(s32) ; CHECK: $d0 = COPY %splat(<8 x s8>) ; CHECK: RET_ReallyLR implicit $d0 - %copy:gpr(s32) = COPY $w0 - %undef:fpr(<8 x s8>) = G_IMPLICIT_DEF - %cst:gpr(s32) = G_CONSTANT i32 0 - %ins:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) - %splat:fpr(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + %copy:_(s32) = COPY $w0 + %undef:_(<8 x s8>) = G_IMPLICIT_DEF + %cst:_(s32) = G_CONSTANT i32 0 + %ins:_(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) + %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) $d0 = COPY %splat(<8 x s8>) RET_ReallyLR implicit $d0 diff --git a/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir index e754377e5f3..3d71b6a948d 100644 --- a/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir +++ b/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-zip.mir @@ -3,7 +3,7 @@ # Check that we can recognize a shuffle mask for a zip instruction, and produce # G_ZIP1 or G_ZIP2 where appropriate. # -# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner --aarch64postlegalizercombinerhelper-disable-rule=ext -verify-machineinstrs %s -o - | FileCheck %s ... --- diff --git a/test/CodeGen/AArch64/GlobalISel/select-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-ext.mir new file mode 100644 index 00000000000..c97ed4d5272 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/select-ext.mir @@ -0,0 +1,153 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test G_EXT selection using AArch64ext patterns. + +... +--- +name: v8s8_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v8s8_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 3 + %v1:fpr(<8 x s8>) = COPY $d0 + %v2:fpr(<8 x s8>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<8 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v16s8_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v16s8_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 3 + %v1:fpr(<16 x s8>) = COPY $q0 + %v2:fpr(<16 x s8>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 3 + %shuf:fpr(<16 x s8>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v4s16_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v4s16_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 6 + %v1:fpr(<4 x s16>) = COPY $d0 + %v2:fpr(<4 x s16>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 6 + %shuf:fpr(<4 x s16>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v8s16_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v8s16_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v2, %v1, 10 + %v1:fpr(<8 x s16>) = COPY $q0 + %v2:fpr(<8 x s16>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 10 + %shuf:fpr(<8 x s16>) = G_EXT %v2, %v1, %3(s32) +... + +... +--- +name: v4s32_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v4s32_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 12 + %v1:fpr(<4 x s32>) = COPY $q0 + %v2:fpr(<4 x s32>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 12 + %shuf:fpr(<4 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s32_EXTv8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + + ; CHECK-LABEL: name: v2s32_EXTv8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %v1:fpr64 = COPY $d0 + ; CHECK: %v2:fpr64 = COPY $d1 + ; CHECK: %shuf:fpr64 = EXTv8i8 %v1, %v2, 2 + %v1:fpr(<2 x s32>) = COPY $d0 + %v2:fpr(<2 x s32>) = COPY $d1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s32>) = G_EXT %v1, %v2, %3(s32) + +... +--- +name: v2s64_EXTv16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v2s64_EXTv16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: %v1:fpr128 = COPY $q0 + ; CHECK: %v2:fpr128 = COPY $q1 + ; CHECK: %shuf:fpr128 = EXTv16i8 %v1, %v2, 2 + %v1:fpr(<2 x s64>) = COPY $q0 + %v2:fpr(<2 x s64>) = COPY $q1 + %3:gpr(s32) = G_CONSTANT i32 2 + %shuf:fpr(<2 x s64>) = G_EXT %v1, %v2, %3(s32) +...