From 18290a4a326f2fe726a1ec2e9bb33b7b69862f04 Mon Sep 17 00:00:00 2001
From: Joe Ellis <joe.ellis@arm.com>
Date: Mon, 26 Oct 2020 17:40:01 +0000
Subject: [PATCH] [SVE][AArch64] Fix TypeSize warning in loop vectorization
 legality

The warning would fire when calling isDereferenceableAndAlignedInLoop
with a scalable load. Calling isDereferenceableAndAlignedInLoop with a
scalable load would result in the use of the now deprecated implicit
cast of TypeSize to uint64_t through the overloaded operator.

This patch fixes this issue by:

- no longer considering vector loads as candidates in
  canVectorizeWithIfConvert. This doesn't make sense in the context of
  identifying scalar loads to vectorize.

- making use of getFixedSize inside isDereferenceableAndAlignedInLoop --
  this removes the dependency on the deprecated interface, and will
  trigger an assertion error if the function is ever called with a
  scalable type.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D89798
---
 lib/Analysis/Loads.cpp                        |  2 +-
 .../Vectorize/LoopVectorizationLegality.cpp   |  2 +-
 .../AArch64/sve-scalable-load-in-loop.ll      | 49 +++++++++++++++++++
 3 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index d63f6b9b196..5ca5384b1fd 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -199,7 +199,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
   Value *Ptr = LI->getPointerOperand();
 
   APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
-                DL.getTypeStoreSize(LI->getType()));
+                DL.getTypeStoreSize(LI->getType()).getFixedSize());
   const Align Alignment = LI->getAlign();
 
   Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
diff --git a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index bdb2570c9e7..622f5b6466e 100644
--- a/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1009,7 +1009,7 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     ScalarEvolution &SE = *PSE.getSE();
     for (Instruction &I : *BB) {
       LoadInst *LI = dyn_cast<LoadInst>(&I);
-      if (LI && !mustSuppressSpeculation(*LI) &&
+      if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
         SafePointers.insert(LI->getPointerOperand());
     }
diff --git a/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll b/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
new file mode 100644
index 00000000000..9aa8c4d25d9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/sve-scalable-load-in-loop.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -O2 -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; This test is checking that a scalable load inside a loop does not trigger a
+; TypeSize error in the loop vectorization legality analysis. It is possible for
+; a scalable/vector load to appear inside a loop at vectorization legality
+; analysis if, for example, the ACLE are used. If we encounter a scalable/vector
+; load, it should not be considered for analysis, and we should not see a
+; TypeSize error.
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning: {{.*}}TypeSize is not scalable
+
+; #include <arm_sve.h>
+;
+; void scalable_load_in_loop(long n, int *a, int *b, svuint32_t *x,
+;                            svuint32_t *y) {
+;     for (unsigned i = 0; i < n; i++) {
+;         if (i % 2 == 0) continue;
+;         a[i] = 2 * b[i];
+;         *x = *y;
+;     }
+; }
+
+; CHECK-LABEL: @scalable_load_in_loop
+; CHECK-NOT: vector.body
+define void @scalable_load_in_loop(i64 %n, <vscale x 4 x i32>* %x, <vscale x 4 x i32>* %y) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+  %rem = and i32 %i, 1
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %for.inc, label %if.end
+
+if.end:
+  %0 = load <vscale x 4 x i32>, <vscale x 4 x i32>* %y
+  store <vscale x 4 x i32> %0, <vscale x 4 x i32>* %x
+  br label %for.inc
+
+for.inc:
+  %inc = add i32 %i, 1
+  %cmp2 = icmp slt i64 0, %n
+  br i1 %cmp2, label %for.body, label %for.cleanup
+
+for.cleanup:
+  ret void
+}