1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

Only attempt to detect AVG if SSE2 is available

Summary:
In PR29973 Sanjay Patel reported an assertion failure when a certain
loop was optimized, for a target without SSE2 support.  It turned out
this was because of the AVG pattern detection introduced in rL253952.

Prevent the assertion failure by bailing out early in
`detectAVGPattern()`, if the target does not support SSE2.

Also add a minimized test case.

Reviewers: congh, eli.friedman, spatel

Subscribers: emaste, llvm-commits

Differential Revision: http://reviews.llvm.org/D20905

llvm-svn: 271548
This commit is contained in:
Dimitry Andric 2016-06-02 17:30:49 +00:00
parent 11989dd404
commit 9beb691de0
2 changed files with 34 additions and 0 deletions

View File

@ -27793,6 +27793,8 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
return SDValue();
if (!Subtarget.hasSSE2())
return SDValue();
if (Subtarget.hasAVX512()) {
if (VT.getSizeInBits() > 512)
return SDValue();

View File

@ -0,0 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: asserts
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s
define <16 x i8> @PR27973() {
; CHECK-LABEL: PR27973:
; CHECK: # BB#0:
; CHECK-NEXT: movb $0, 15(%rdi)
; CHECK-NEXT: movb $0, 14(%rdi)
; CHECK-NEXT: movb $0, 13(%rdi)
; CHECK-NEXT: movb $0, 12(%rdi)
; CHECK-NEXT: movb $0, 11(%rdi)
; CHECK-NEXT: movb $0, 10(%rdi)
; CHECK-NEXT: movb $0, 9(%rdi)
; CHECK-NEXT: movb $0, 8(%rdi)
; CHECK-NEXT: movb $0, 7(%rdi)
; CHECK-NEXT: movb $0, 6(%rdi)
; CHECK-NEXT: movb $0, 5(%rdi)
; CHECK-NEXT: movb $0, 4(%rdi)
; CHECK-NEXT: movb $0, 3(%rdi)
; CHECK-NEXT: movb $0, 2(%rdi)
; CHECK-NEXT: movb $0, 1(%rdi)
; CHECK-NEXT: movb $0, (%rdi)
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
;
%t0 = zext <16 x i8> zeroinitializer to <16 x i32>
%t1 = add nuw nsw <16 x i32> %t0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%t2 = lshr <16 x i32> %t1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%t3 = trunc <16 x i32> %t2 to <16 x i8>
ret <16 x i8> %t3
}