From 69e5ee4faa2ac42e4013ab60c0e87cb4a845efd0 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Mon, 26 Jul 2021 16:42:20 -0700 Subject: [PATCH] Revert "[AArch64][GlobalISel] Legalize ctpop s128" This reverts commit 97e95fea53fc403c2a12e356dc835fc922123575. It broke test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll. Not sure why I didn't see that. --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 10 +--- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 7 ++- .../AArch64/GlobalISel/legalize-ctpop.mir | 24 -------- test/CodeGen/AArch64/popcount.ll | 58 ++++++++----------- 4 files changed, 30 insertions(+), 69 deletions(-) diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 8286f4f95be..38b2b2363fa 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5656,15 +5656,7 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0)); auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1)); - - LLT CountTy = LLT::scalar(Log2_64_Ceil(SrcTy.getSizeInBits())); - if (CountTy.getSizeInBits() < DstTy.getSizeInBits()) { - LoCTPOP = MIRBuilder.buildTrunc(CountTy, LoCTPOP); - HiCTPOP = MIRBuilder.buildTrunc(CountTy, HiCTPOP); - auto Add = MIRBuilder.buildAdd(CountTy, HiCTPOP, LoCTPOP); - MIRBuilder.buildZExt(DstReg, Add); - } else - MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); + MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP); MI.eraseFromParent(); return Legalized; diff --git a/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index f7764676708..08e4a119127 100644 --- a/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -764,6 +764,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SBFX, G_UBFX}) .customFor({{s32, s32}, {s64, s64}}); + // TODO: Custom legalization for s128 // TODO: Use generic lowering when custom lowering is not possible. auto always = [=](const LegalityQuery &Q) { return true; }; getActionDefinitionsBuilder(G_CTPOP) @@ -774,7 +775,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .maxScalarEltSameAsIf(always, 1, 0) .customFor({{s32, s32}, {s64, s64}, - {s128, s128}, {v2s64, v2s64}, {v2s32, v2s32}, {v4s32, v4s32}, @@ -1151,7 +1151,8 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI, // v8s16,v4s32,v2s64 -> v16i8 LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8); if (Ty.isScalar()) { - assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!"); + // TODO: Handle s128. + assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!"); if (Size == 32) { Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0); } @@ -1197,7 +1198,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI, } // Post-conditioning. - if (Ty.isScalar() && (Size == 64 || Size == 128)) + if (Ty.isScalar() && Size == 64) MIRBuilder.buildZExt(Dst, UADD); else UADD->getOperand(0).setReg(Dst); diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir index 4748314f69f..04406c15296 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-ctpop.mir @@ -78,30 +78,6 @@ body: | $x0 = COPY %ctpop(s64) RET_ReallyLR implicit $x0 -... ---- -name: s128_lower -tracksRegLiveness: true -body: | - bb.0: - liveins: $q0 - ; CHECK-LABEL: name: s128_lower - ; CHECK: liveins: $q0 - ; CHECK: %copy:_(s128) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST %copy(s128) - ; CHECK: [[CTPOP:%[0-9]+]]:_(<16 x s8>) = G_CTPOP [[BITCAST]](<16 x s8>) - ; CHECK: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[CTPOP]](<16 x s8>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[INT]](s32), [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: %ctpop:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) - ; CHECK: $q0 = COPY %ctpop(s128) - ; CHECK: RET_ReallyLR implicit $q0 - %copy:_(s128) = COPY $q0 - %ctpop:_(s128) = G_CTPOP %copy(s128) - $q0 = COPY %ctpop(s128) - RET_ReallyLR implicit $q0 - ... --- name: widen_s16 diff --git a/test/CodeGen/AArch64/popcount.ll b/test/CodeGen/AArch64/popcount.ll index 1681b86cdc2..2e5e988f057 100644 --- a/test/CodeGen/AArch64/popcount.ll +++ b/test/CodeGen/AArch64/popcount.ll @@ -5,12 +5,15 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount128: ; CHECK: // %bb.0: // %Entry -; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret Entry: @@ -27,35 +30,27 @@ declare i128 @llvm.ctpop.i128(i128) define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount256: ; CHECK: // %bb.0: // %Entry -; CHECK-NEXT: ldr x11, [x0] -; CHECK-NEXT: ldr x10, [x0, #8] -; CHECK-NEXT: ldr x9, [x0, #16] -; CHECK-NEXT: ldr x8, [x0, #24] +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: ldr x9, [x0, #24] +; CHECK-NEXT: ldr d1, [x0, #16] ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.d[0], x11 -; CHECK-NEXT: mov v0.d[1], x10 -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.d[0], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: cnt v0.16b, v0.16b -; CHECK-NEXT: uaddlv h2, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: cnt v1.16b, v1.16b -; CHECK-NEXT: uaddlv h2, v1.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: // implicit-def: $w9 -; CHECK-NEXT: // kill: def $x8 killed $w8 -; CHECK-NEXT: // kill: def $x9 killed $w9 -; CHECK-NEXT: bfi x8, x9, #32, #32 -; CHECK-NEXT: and x8, x8, #0xff -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret Entry: %1 = load i256, i256* %0, align 16 @@ -71,19 +66,16 @@ define <1 x i128> @popcount1x128(<1 x i128> %0) { ; CHECK-LABEL: popcount1x128: ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: mov v0.d[0], x0 +; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: uaddlv h1, v0.16b ; CHECK-NEXT: // implicit-def: $q0 ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: // kill: def $x0 killed $w0 -; CHECK-NEXT: // kill: def $x8 killed $w8 -; CHECK-NEXT: bfi x0, x8, #32, #32 -; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret Entry: %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0)