From 1c2cdfcee34cec627d5c46a44336d20296b23aa4 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Mon, 26 Jul 2021 11:25:25 -0700 Subject: [PATCH] [GlobalISel] Add scalar widening for G_MERGE_VALUES destination This adds support for the case where WideSize = DstSize + K * SrcSize In this case, we can pad the G_MERGE_VALUES instruction with K extra undef values with width SrcSize. Then the destination can be handled via widenScalarDst. Differential Revision: https://reviews.llvm.org/D106814 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 6 +- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 44 +++++- .../GlobalISel/legalize-merge-values.mir | 126 +++++++++++++++++- 3 files changed, 168 insertions(+), 8 deletions(-) diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 67141f3a632..7a970b99101 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -165,8 +165,10 @@ public: Register widenWithUnmerge(LLT WideTy, Register OrigReg); private: - LegalizeResult - widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); + LegalizeResult widenScalarSrcMergeValues(MachineInstr &MI, LLT WideTy); + LegalizeResult widenScalarDstMergeValues(MachineInstr &MI, LLT WideTy); + LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); LegalizeResult widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 50f0afbf6af..8286f4f95be 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1418,11 +1418,7 @@ void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { } LegalizerHelper::LegalizeResult -LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy) { - if (TypeIdx != 1) - return UnableToLegalize; - +LegalizerHelper::widenScalarSrcMergeValues(MachineInstr &MI, LLT WideTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); if (DstTy.isVector()) @@ -1539,6 +1535,44 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarDstMergeValues(MachineInstr &MI, LLT WideTy) { + // Disallow for vectors and pointers. Not sure about what to do with pointers. + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + if (!DstTy.isScalar()) + return UnableToLegalize; + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + const int DstSize = DstTy.getSizeInBits(); + const int SrcSize = SrcTy.getSizeInBits(); + const int WideSize = WideTy.getSizeInBits(); + // If WideSize = DstSize + K * SrcSize then we can get WideSize by padding + // with K undef elements. + // + // dst = G_MERGE_VALUES elt1, elt2, ..., eltN + // -> wide_dst = G_MERGE_VALUES elt1, elt2, ... eltN, pad1, pad2, ... padK + int Difference = WideSize - DstSize; + if ((Difference) % SrcSize != 0) + return UnableToLegalize; + int NumPadEltsToAdd = Difference / SrcSize; + assert(NumPadEltsToAdd && "Expected to add at least one element?"); + MachineFunction &MF = *MI.getMF(); + for (int I = 0; I < NumPadEltsToAdd; ++I) { + auto PadElt = MIRBuilder.buildUndef(SrcTy); + MI.addOperand( + MF, MachineOperand::CreateReg(PadElt.getReg(0), /*isDef = */ false)); + } + widenScalarDst(MI, WideTy, 0); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx == 0) + return widenScalarDstMergeValues(MI, WideTy); + return widenScalarSrcMergeValues(MI, WideTy); +} + Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) { Register WideReg = MRI.createGenericVirtualRegister(WideTy); LLT OrigTy = MRI.getType(OrigReg); diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir index a802baca4c8..31e9734ef39 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=aarch64 -O0 -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -march=aarch64 -O0 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --- name: test_merge_s4 @@ -26,3 +26,127 @@ body: | %4:_(s64) = G_ANYEXT %3 $x0 = COPY %4 ... +--- +name: test_merge_s24_s8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; 24 + 8 = 32 => Add 1 undef element as padding. + + ; CHECK-LABEL: name: test_merge_s24_s8 + ; CHECK: liveins: $w0 + ; CHECK: %x:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES %x(s8), %x(s8), %x(s8), [[DEF]](s8) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[MV]](s32) + ; CHECK: %zext:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: $w0 = COPY %zext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s8) = G_CONSTANT i8 0 + %merge:_(s24) = G_MERGE_VALUES %x, %x, %x + %zext:_(s32) = G_ZEXT %merge + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +... +--- +name: test_merge_s40_s8 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + + ; 64 - 40 = 24; 24 / 8 = 3 => Add 3 undef elements as padding. + + ; CHECK-LABEL: name: test_merge_s40_s8 + ; CHECK: liveins: $x0 + ; CHECK: %x:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES %x(s8), %x(s8), %x(s8), %x(s8), %x(s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[MV]](s64) + ; CHECK: %zext:_(s64) = G_AND [[COPY]], [[C]] + ; CHECK: $x0 = COPY %zext(s64) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(s8) = G_CONSTANT i8 0 + %merge:_(s40) = G_MERGE_VALUES %x, %x, %x, %x, %x + %zext:_(s64) = G_ZEXT %merge + $x0 = COPY %zext + RET_ReallyLR implicit $x0 +... +--- +name: test_merge_s7_s1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; 7 + 1 = 8 -> Add one undef. + + ; CHECK-LABEL: name: test_merge_s7_s1 + ; CHECK: liveins: $w0 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: %x:_(s1) = G_TRUNC [[C]](s8) + ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s8) = G_MERGE_VALUES %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), [[DEF]](s1) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s8) + ; CHECK: %zext:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: $w0 = COPY %zext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s1) = G_CONSTANT i1 0 + %merge:_(s7) = G_MERGE_VALUES %x, %x, %x, %x, %x, %x, %x + %zext:_(s32) = G_ZEXT %merge + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +... +--- +name: test_merge_s21_s7 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; Can't legalize this one yet. + + ; CHECK-LABEL: name: test_merge_s21_s7 + ; CHECK: liveins: $w0 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: %x:_(s7) = G_TRUNC [[C]](s8) + ; CHECK: %merge:_(s21) = G_MERGE_VALUES %x(s7), %x(s7), %x(s7) + ; CHECK: %zext:_(s32) = G_ZEXT %merge(s21) + ; CHECK: $w0 = COPY %zext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s7) = G_CONSTANT i7 0 + %merge:_(s21) = G_MERGE_VALUES %x, %x, %x + %zext:_(s32) = G_ZEXT %merge + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +... +--- +name: test_merge_s11_s1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + ; 7 + 1 = 8 -> Add one undef. + + ; CHECK-LABEL: name: test_merge_s11_s1 + ; CHECK: liveins: $w0 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: %x:_(s1) = G_TRUNC [[C]](s8) + ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), %x(s1), [[DEF]](s1), [[DEF]](s1), [[DEF]](s1), [[DEF]](s1), [[DEF]](s1) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16) + ; CHECK: %zext:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK: $w0 = COPY %zext(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %x:_(s1) = G_CONSTANT i1 0 + %merge:_(s11) = G_MERGE_VALUES %x, %x, %x, %x, %x, %x, %x, %x, %x, %x, %x + %zext:_(s32) = G_ZEXT %merge + $w0 = COPY %zext + RET_ReallyLR implicit $w0