From 21d0e789430591b4e0ab3bff5d918570ecb3af61 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 15 Oct 2020 13:12:38 -0400 Subject: [PATCH] [CostModel] remove cost-kind predicate for ctlz/cttz intrinsics in basic TTI implementation The cost modeling for intrinsics is a patchwork based on different expectations from the callers, so it's a mess. I'm hoping to untangle this to allow canonicalization to the new min/max intrinsics in IR. The general goal is to remove the cost-kind restriction here in the basic implementation class. Ie, if some intrinsic has throughput cost of 104, assume that it has the same size, latency, and blended costs. Effectively, an intrinsic with cost N is composed of N simple instructions. If that's not correct, the target should provide a more accurate override. The x86-64 SSE2 subtarget cost diffs require explanation: 1. The scalar ctlz/cttz are assuming "BSR+XOR+CMOV" or "TEST+BSF+CMOV/BRANCH", so not cheap. 2. The 128-bit SSE vector width versions assume cost of 18 or 26 (no explanation provided in the tables, but this corresponds to a bunch of shift/logic/compare). 3. The 512-bit vectors in the test file are scaled up by a factor of 4 from the legal vector width costs. 4. The plain latency cost-kind is not affected in this patch because that calc is diverted before we get to getIntrinsicInstrCost(). Differential Revision: https://reviews.llvm.org/D89461 --- include/llvm/CodeGen/BasicTTIImpl.h | 18 ++++++------------ .../CostModel/X86/intrinsic-cost-kinds.ll | 16 ++++++++-------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 742fded56a8..e3e167c318e 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -1150,21 +1150,15 @@ public: break; case Intrinsic::cttz: - // FIXME: all cost kinds should default to the same thing? - if (CostKind != TTI::TCK_RecipThroughput) { - if (getTLI()->isCheapToSpeculateCttz()) - return TargetTransformInfo::TCC_Basic; - return BaseT::getIntrinsicInstrCost(ICA, CostKind); - } + // FIXME: If necessary, this should go in target-specific overrides. + if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCttz()) + return TargetTransformInfo::TCC_Basic; break; case Intrinsic::ctlz: - // FIXME: all cost kinds should default to the same thing? - if (CostKind != TTI::TCK_RecipThroughput) { - if (getTLI()->isCheapToSpeculateCtlz()) - return TargetTransformInfo::TCC_Basic; - return BaseT::getIntrinsicInstrCost(ICA, CostKind); - } + // FIXME: If necessary, this should go in target-specific overrides. + if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCtlz()) + return TargetTransformInfo::TCC_Basic; break; case Intrinsic::memcpy: diff --git a/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 806bf251a10..c76585a51ef 100644 --- a/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -95,13 +95,13 @@ define void @cttz(i32 %a, <16 x i32> %va) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'cttz' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'cttz' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) @@ -121,13 +121,13 @@ define void @ctlz(i32 %a, <16 x i32> %va) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'ctlz' -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; SIZE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'ctlz' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)