mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
cd7dee863e
This initial version only peeks through cases where we just demand the sign bit of an ashr shift, but we could generalize this further depending on how many sign bits we already have. The pr18014.ll case is a minor annoyance - we've failed to to move the psrad/paddd after the blendvps which would have avoided the extra move, but we have still increased the ILP.
25 lines
918 B
LLVM
25 lines
918 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s
|
|
|
|
; Ensure PSRAD is generated as the condition is consumed by both PADD and
|
|
; BLENDVPS. PADD requires all bits setting properly.
|
|
|
|
define <4 x i32> @foo(<4 x i32>* %p, <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: pslld $31, %xmm0
|
|
; CHECK-NEXT: movdqa %xmm0, %xmm3
|
|
; CHECK-NEXT: psrad $31, %xmm3
|
|
; CHECK-NEXT: paddd %xmm1, %xmm3
|
|
; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm2
|
|
; CHECK-NEXT: movaps %xmm2, (%rdi)
|
|
; CHECK-NEXT: movdqa %xmm3, %xmm0
|
|
; CHECK-NEXT: retq
|
|
%sext_cond = sext <4 x i1> %cond to <4 x i32>
|
|
%t1 = add <4 x i32> %v1, %sext_cond
|
|
%t2 = select <4 x i1> %cond, <4 x i32> %v1, <4 x i32> %v2
|
|
store <4 x i32> %t2, <4 x i32>* %p
|
|
ret <4 x i32> %t1
|
|
}
|
|
|