From d259f6577aec3bed4d3f0e5f0da14bcddeafd1b3 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Tue, 8 Jun 2021 10:49:22 +0100 Subject: [PATCH] [CostModel] Return an invalid cost for memory ops with unsupported types Fixes getTypeConversion to return `TypeScalarizeScalableVector` when a scalable vector type cannot be legalized by widening/splitting. When this is the method of legalization found, getTypeLegalizationCost will return an Invalid cost. The getMemoryOpCost, getMaskedMemoryOpCost & getGatherScatterOpCost functions already call getTypeLegalizationCost and will now also return an Invalid cost for unsupported types. Reviewed By: sdesmalen, david-arm Differential Revision: https://reviews.llvm.org/D102515 --- lib/CodeGen/TargetLoweringBase.cpp | 9 +++-- .../AArch64/AArch64TargetTransformInfo.cpp | 7 ++++ .../CostModel/AArch64/sve-illegal-types.ll | 40 +++++++++++++++++++ .../LoopVectorize/AArch64/scalable-vf-hint.ll | 26 ++++++------ .../AArch64/extract-cmp-binop.ll | 21 ++++++++++ .../VectorCombine/X86/extract-cmp-binop.ll | 19 --------- unittests/CodeGen/AArch64SelectionDAGTest.cpp | 8 ++-- 7 files changed, 93 insertions(+), 37 deletions(-) create mode 100644 test/Analysis/CostModel/AArch64/sve-illegal-types.ll create mode 100644 test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index f4e3fad1108..d2c291f2ae7 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1016,8 +1016,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { // If type is to be expanded, split the vector. // <4 x i140> -> <2 x i140> if (LK.first == TypeExpandInteger) { - if (VT.getVectorElementCount() == ElementCount::getScalable(1)) - report_fatal_error("Cannot legalize this scalable vector"); + if (VT.getVectorElementCount().isScalable()) + return LegalizeKind(TypeScalarizeScalableVector, EltVT); return LegalizeKind(TypeSplitVector, VT.getHalfNumVectorElementsVT(Context)); } @@ -1080,7 +1080,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { } if (VT.getVectorElementCount() == ElementCount::getScalable(1)) - report_fatal_error("Cannot legalize this vector"); + return LegalizeKind(TypeScalarizeScalableVector, EltVT); // Vectors with illegal element types are expanded. EVT NVT = EVT::getVectorVT(Context, EltVT, @@ -1845,6 +1845,9 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, while (true) { LegalizeKind LK = getTypeConversion(C, MTy); + if (LK.first == TypeScalarizeScalableVector) + return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty)); + if (LK.first == TypeLegal) return std::make_pair(Cost, MTy.getSimpleVT()); diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 70d1d6a5333..7f02023322d 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1288,6 +1288,8 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); auto LT = TLI->getTypeLegalizationCost(DL, Src); + if (!LT.first.isValid()) + return InstructionCost::getInvalid(); return LT.first * 2; } @@ -1300,6 +1302,9 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost( Alignment, CostKind, I); auto *VT = cast(DataTy); auto LT = TLI->getTypeLegalizationCost(DL, DataTy); + if (!LT.first.isValid()) + return InstructionCost::getInvalid(); + ElementCount LegalVF = LT.second.getVectorElementCount(); Optional MaxNumVScale = getMaxVScale(); assert(MaxNumVScale && "Expected valid max vscale value"); @@ -1326,6 +1331,8 @@ InstructionCost AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, CostKind); auto LT = TLI->getTypeLegalizationCost(DL, Ty); + if (!LT.first.isValid()) + return InstructionCost::getInvalid(); // TODO: consider latency as well for TCK_SizeAndLatency. if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) diff --git a/test/Analysis/CostModel/AArch64/sve-illegal-types.ll b/test/Analysis/CostModel/AArch64/sve-illegal-types.ll new file mode 100644 index 00000000000..eeb569dbb28 --- /dev/null +++ b/test/Analysis/CostModel/AArch64/sve-illegal-types.ll @@ -0,0 +1,40 @@ +; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define void @load_store(* %ptrs) { +; CHECK-LABEL: 'load_store' +; CHECK-NEXT: Invalid cost for instruction: %load1 = load , * undef +; CHECK-NEXT: Invalid cost for instruction: %load2 = load , * undef +; CHECK-NEXT: Invalid cost for instruction: %load3 = load , * undef +; CHECK-NEXT: Invalid cost for instruction: %load4 = load , * undef +; CHECK-NEXT: Invalid cost for instruction: store %load1, * %ptrs + %load1 = load , * undef + %load2 = load , * undef + %load3 = load , * undef + %load4 = load , * undef + store %load1, * %ptrs + ret void +} + +define void @masked_load_store(* %ptrs, * %val, %mask, %passthru) { +; CHECK-LABEL: 'masked_load_store' +; CHECK-NEXT: Invalid cost for instruction: %mload = call @llvm.masked.load.nxv1i128.p0nxv1i128(* %val, i32 8, %mask, %passthru) +; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.store.nxv1i128.p0nxv1i128( %mload, * %ptrs, i32 8, %mask) + %mload = call @llvm.masked.load.nxv1i128(* %val, i32 8, %mask, %passthru) + call void @llvm.masked.store.nxv1i128( %mload, * %ptrs, i32 8, %mask) + ret void +} + +define void @masked_gather_scatter( %ptrs, %val, %mask, %passthru) { +; CHECK-LABEL: 'masked_gather_scatter' +; CHECK-NEXT: Invalid cost for instruction: %mgather = call @llvm.masked.gather.nxv1i128.nxv1p0i128( %val, i32 0, %mask, %passthru) +; CHECK-NEXT: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i128.nxv1p0i128( %mgather, %ptrs, i32 0, %mask) + %mgather = call @llvm.masked.gather.nxv1i128( %val, i32 0, %mask, %passthru) + call void @llvm.masked.scatter.nxv1i128( %mgather, %ptrs, i32 0, %mask) + ret void +} + +declare @llvm.masked.load.nxv1i128(*, i32, , ) +declare @llvm.masked.gather.nxv1i128(, i32, , ) + +declare void @llvm.masked.store.nxv1i128(, *, i32, ) +declare void @llvm.masked.scatter.nxv1i128(, , i32, ) diff --git a/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll index a29cf1823f6..2e600d461e8 100644 --- a/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -1,8 +1,8 @@ ; REQUIRES: asserts ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck %s ; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s -; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s -; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s +; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S -scalable-vectorization=on < %s 2>%t | FileCheck --check-prefix=CHECK-NO-SVE %s +; RUN: cat %t | FileCheck %s -check-prefix=CHECK-NO-SVE-REMARKS target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -309,11 +309,12 @@ exit: !16 = !{!"llvm.loop.vectorize.width", i32 16} !17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} -; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve" -; CHECK-NO-SVE: LV: Disabling scalable vectorization, because target does not support scalable vectors. -; CHECK-NO-SVE: remark: :0:0: Disabling scalable vectorization, because target does not support scalable vectors. -; CHECK-NO-SVE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. -; CHECK-NO-SVE: LV: Selecting VF: 4. +; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_sve" +; CHECK-NO-SVE-REMARKS: LV: Disabling scalable vectorization, because target does not support scalable vectors. +; CHECK-NO-SVE-REMARKS: remark: :0:0: Disabling scalable vectorization, because target does not support scalable vectors. +; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. +; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4. +; CHECK-NO-SVE-LABEL: @test_no_sve ; CHECK-NO-SVE: <4 x i32> ; CHECK-NO-SVE-NOT: define void @test_no_sve(i32* %a, i32* %b) { @@ -343,11 +344,12 @@ exit: ; Test the LV falls back to fixed-width vectorization if scalable vectors are ; supported but max vscale is undefined. ; -; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale" -; CEHCK-NO-MAX-VSCALE: The max safe fixed VF is: 4. -; CHECK-NO-MAX-VSCALE: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. -; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4. -; CHECK-NO-MAX-VSCALE: <4 x i32> +; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_max_vscale" +; CHECK-NO-SVE-REMARKS: The max safe fixed VF is: 4. +; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. +; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4. +; CHECK-NO-SVE-LABEL: @test_no_max_vscale +; CHECK-NO-SVE: <4 x i32> define void @test_no_max_vscale(i32* %a, i32* %b) { entry: br label %loop diff --git a/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll b/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll new file mode 100644 index 00000000000..bf61b3b2b73 --- /dev/null +++ b/test/Transforms/VectorCombine/AArch64/extract-cmp-binop.ll @@ -0,0 +1,21 @@ +; RUN: opt -vector-combine -S %s | FileCheck %s + +; Negative test for extract + cmp + binop - don't try this with scalable vectors. +; Moved from X86/extract-cmp-binop.ll + +define i1 @scalable( %a) { +; CHECK-LABEL: @scalable( +; CHECK-NEXT: [[E1:%.*]] = extractelement [[A:%.*]], i32 3 +; CHECK-NEXT: [[E2:%.*]] = extractelement [[A]], i32 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8 +; CHECK-NEXT: [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: ret i1 [[R]] +; + %e1 = extractelement %a, i32 3 + %e2 = extractelement %a, i32 1 + %cmp1 = icmp sgt i32 %e1, 42 + %cmp2 = icmp sgt i32 %e2, -8 + %r = xor i1 %cmp1, %cmp2 + ret i1 %r +} diff --git a/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll b/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll index 1a0d77e547f..73e52c13a46 100644 --- a/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll +++ b/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll @@ -148,22 +148,3 @@ define i1 @different_source_vec(<4 x i32> %a, <4 x i32> %b) { %r = and i1 %cmp1, %cmp2 ret i1 %r } - -; Negative test - don't try this with scalable vectors. - -define i1 @scalable( %a) { -; CHECK-LABEL: @scalable( -; CHECK-NEXT: [[E1:%.*]] = extractelement [[A:%.*]], i32 3 -; CHECK-NEXT: [[E2:%.*]] = extractelement [[A]], i32 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8 -; CHECK-NEXT: [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]] -; CHECK-NEXT: ret i1 [[R]] -; - %e1 = extractelement %a, i32 3 - %e2 = extractelement %a, i32 1 - %cmp1 = icmp sgt i32 %e1, 42 - %cmp2 = icmp sgt i32 %e2, -8 - %r = xor i1 %cmp1, %cmp2 - ret i1 %r -} diff --git a/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 0024d836873..03f1fdcf37d 100644 --- a/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -573,9 +573,11 @@ TEST_F(AArch64SelectionDAGTest, getTypeConversion_WidenScalableEVT) { EXPECT_EQ(getTypeToTransformTo(FromVT), ToVT); } -TEST_F(AArch64SelectionDAGTest, getTypeConversion_NoScalarizeEVT_nxv1f128) { - EVT FromVT = EVT::getVectorVT(Context, MVT::f128, 1, true); - EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector"); +TEST_F(AArch64SelectionDAGTest, + getTypeConversion_ScalarizeScalableEVT_nxv1f128) { + EVT VT = EVT::getVectorVT(Context, MVT::f128, ElementCount::getScalable(1)); + EXPECT_EQ(getTypeAction(VT), TargetLoweringBase::TypeScalarizeScalableVector); + EXPECT_EQ(getTypeToTransformTo(VT), MVT::f128); } TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) {