[BasicTTI] Set scalarization cost of scalable vector casts to Invalid.

When BasicTTIImpl::getCastInstrCost can't determine the cost of a vector cast operation when the types need legalization, it falls back to calculating scalarization costs. Instead of crashing on `cast<FixedVectorType>(DstVTy)` when the type is a scalable vector, return an Invalid cost. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D106655
2025-01-31 12:41:49 +01:00 · 2021-07-24 13:41:40 +01:00 · 2021-07-24 13:41:40 +01:00 · 1f523effc3
commit 1f523effc3
parent d69614109a
2 changed files with 42 additions and 0 deletions
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@ -1004,6 +1004,10 @@ public:
                                          CostKind, I));
      }

+      // Scalarization cost is Invalid, can't assume any num elements.
+      if (isa<ScalableVectorType>(DstVTy))
+        return InstructionCost::getInvalid();
+
      // In other cases where the source or destination are illegal, assume
      // the operation will get scalarized.
      unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
--- a/test/Analysis/CostModel/AArch64/sve-fptoi.ll
+++ b/test/Analysis/CostModel/AArch64/sve-fptoi.ll
@ -5,6 +5,24 @@ target triple = "aarch64-unknown-linux-gnu"

 define void @sve-fptoi() {
 ; CHECK-LABEL: 'sve-fptoi'
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si8 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui8 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si32 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i32>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui32 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i32>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_si64 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i64>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f16_to_ui64 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i64>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si8 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui8 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_si64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
+; CHECK-NEXT: Cost Model: Invalid cost for instruction: %nv1f32_to_ui64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_si8 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_ui8 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_si16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_ui16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_si32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv1f64_to_ui32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f16_to_si8 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f16_to_ui8 = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %nv2f16_to_si32 = fptosi <vscale x 2 x half> undef to <vscale x 2 x i32>
@ -59,6 +77,26 @@ define void @sve-fptoi() {
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction:   %nv8f64_to_ui16 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i16>
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nv8f64_to_si32 = fptosi <vscale x 8 x double> undef to <vscale x 8 x i32>
 ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %nv8f64_to_ui32 = fptoui <vscale x 8 x double> undef to <vscale x 8 x i32>
+  %nv1f16_to_si8  = fptosi <vscale x 1 x half> undef to <vscale x 1 x i8>
+  %nv1f16_to_ui8  = fptoui <vscale x 1 x half> undef to <vscale x 1 x i8>
+  %nv1f16_to_si32 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i32>
+  %nv1f16_to_ui32 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i32>
+  %nv1f16_to_si64 = fptosi <vscale x 1 x half> undef to <vscale x 1 x i64>
+  %nv1f16_to_ui64 = fptoui <vscale x 1 x half> undef to <vscale x 1 x i64>
+
+  %nv1f32_to_si8  = fptosi <vscale x 1 x float> undef to <vscale x 1 x i8>
+  %nv1f32_to_ui8  = fptoui <vscale x 1 x float> undef to <vscale x 1 x i8>
+  %nv1f32_to_si16 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i16>
+  %nv1f32_to_ui16 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i16>
+  %nv1f32_to_si64 = fptosi <vscale x 1 x float> undef to <vscale x 1 x i64>
+  %nv1f32_to_ui64 = fptoui <vscale x 1 x float> undef to <vscale x 1 x i64>
+
+  %nv1f64_to_si8  = fptosi <vscale x 1 x double> undef to <vscale x 1 x i8>
+  %nv1f64_to_ui8  = fptoui <vscale x 1 x double> undef to <vscale x 1 x i8>
+  %nv1f64_to_si16 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i16>
+  %nv1f64_to_ui16 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i16>
+  %nv1f64_to_si32 = fptosi <vscale x 1 x double> undef to <vscale x 1 x i32>
+  %nv1f64_to_ui32 = fptoui <vscale x 1 x double> undef to <vscale x 1 x i32>

  %nv2f16_to_si8  = fptosi <vscale x 2 x half> undef to <vscale x 2 x i8>
  %nv2f16_to_ui8  = fptoui <vscale x 2 x half> undef to <vscale x 2 x i8>