Only attempt to detect AVG if SSE2 is available

Summary: In PR29973 Sanjay Patel reported an assertion failure when a certain loop was optimized, for a target without SSE2 support. It turned out this was because of the AVG pattern detection introduced in rL253952. Prevent the assertion failure by bailing out early in `detectAVGPattern()`, if the target does not support SSE2. Also add a minimized test case. Reviewers: congh, eli.friedman, spatel Subscribers: emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D20905 llvm-svn: 271548
2024-10-19 19:12:56 +02:00 · 2016-06-02 17:30:49 +00:00 · 2016-06-02 17:30:49 +00:00 · 9beb691de0
commit 9beb691de0
parent 11989dd404
2 changed files with 34 additions and 0 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -27793,6 +27793,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
  if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
    return SDValue();

+  if (!Subtarget.hasSSE2())
+    return SDValue();
  if (Subtarget.hasAVX512()) {
    if (VT.getSizeInBits() > 512)
      return SDValue();
--- a/test/CodeGen/X86/no-sse2-avg.ll
+++ b/test/CodeGen/X86/no-sse2-avg.ll
@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s
+
+define <16 x i8> @PR27973() {
+; CHECK-LABEL: PR27973:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movb $0, 15(%rdi)
+; CHECK-NEXT:    movb $0, 14(%rdi)
+; CHECK-NEXT:    movb $0, 13(%rdi)
+; CHECK-NEXT:    movb $0, 12(%rdi)
+; CHECK-NEXT:    movb $0, 11(%rdi)
+; CHECK-NEXT:    movb $0, 10(%rdi)
+; CHECK-NEXT:    movb $0, 9(%rdi)
+; CHECK-NEXT:    movb $0, 8(%rdi)
+; CHECK-NEXT:    movb $0, 7(%rdi)
+; CHECK-NEXT:    movb $0, 6(%rdi)
+; CHECK-NEXT:    movb $0, 5(%rdi)
+; CHECK-NEXT:    movb $0, 4(%rdi)
+; CHECK-NEXT:    movb $0, 3(%rdi)
+; CHECK-NEXT:    movb $0, 2(%rdi)
+; CHECK-NEXT:    movb $0, 1(%rdi)
+; CHECK-NEXT:    movb $0, (%rdi)
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    retq
+;
+  %t0 = zext <16 x i8> zeroinitializer to <16 x i32>
+  %t1 = add nuw nsw <16 x i32> %t0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %t2 = lshr <16 x i32> %t1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %t3 = trunc <16 x i32> %t2 to <16 x i8>
+  ret <16 x i8> %t3
+}