mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
6d5f31ab18
Summary: Nodes that have no uses are eventually pruned when they are selected from the worklist. Record nodes newly added to the worklist or DAG and perform pruning after every combine attempt. Reviewers: efriedma, RKSimon, craig.topper, spatel, jyknight Reviewed By: jyknight Subscribers: jdoerfert, jyknight, nemanjai, jvesely, nhaehnle, javed.absar, hiraditya, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D58070 llvm-svn: 357283
182 lines
6.0 KiB
LLVM
182 lines
6.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.6.6 -mattr=+sse4.1 | FileCheck %s
|
|
|
|
%0 = type { double }
|
|
%union.anon = type { float }
|
|
|
|
define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: double_signbit:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movmskpd %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%__x.addr.i = alloca double, align 8
|
|
%__u.i = alloca %0, align 8
|
|
%0 = bitcast double* %__x.addr.i to i8*
|
|
%1 = bitcast %0* %__u.i to i8*
|
|
store double %d1, double* %__x.addr.i, align 8
|
|
%__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
|
|
store double %d1, double* %__f.i, align 8
|
|
%tmp = bitcast double %d1 to i64
|
|
%tmp1 = lshr i64 %tmp, 63
|
|
%shr.i = trunc i64 %tmp1 to i32
|
|
ret i32 %shr.i
|
|
}
|
|
|
|
define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: double_add_signbit:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: addsd %xmm1, %xmm0
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movmskpd %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%__x.addr.i = alloca double, align 8
|
|
%__u.i = alloca %0, align 8
|
|
%add = fadd double %d1, %d2
|
|
%0 = bitcast double* %__x.addr.i to i8*
|
|
%1 = bitcast %0* %__u.i to i8*
|
|
store double %add, double* %__x.addr.i, align 8
|
|
%__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
|
|
store double %add, double* %__f.i, align 8
|
|
%tmp = bitcast double %add to i64
|
|
%tmp1 = lshr i64 %tmp, 63
|
|
%shr.i = trunc i64 %tmp1 to i32
|
|
ret i32 %shr.i
|
|
}
|
|
|
|
define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: float_signbit:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%__x.addr.i = alloca float, align 4
|
|
%__u.i = alloca %union.anon, align 4
|
|
%0 = bitcast float* %__x.addr.i to i8*
|
|
%1 = bitcast %union.anon* %__u.i to i8*
|
|
store float %f1, float* %__x.addr.i, align 4
|
|
%__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
|
|
store float %f1, float* %__f.i, align 4
|
|
%2 = bitcast float %f1 to i32
|
|
%shr.i = lshr i32 %2, 31
|
|
ret i32 %shr.i
|
|
}
|
|
|
|
define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
|
|
; CHECK-LABEL: float_add_signbit:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: addss %xmm1, %xmm0
|
|
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%__x.addr.i = alloca float, align 4
|
|
%__u.i = alloca %union.anon, align 4
|
|
%add = fadd float %f1, %f2
|
|
%0 = bitcast float* %__x.addr.i to i8*
|
|
%1 = bitcast %union.anon* %__u.i to i8*
|
|
store float %add, float* %__x.addr.i, align 4
|
|
%__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
|
|
store float %add, float* %__f.i, align 4
|
|
%2 = bitcast float %add to i32
|
|
%shr.i = lshr i32 %2, 31
|
|
ret i32 %shr.i
|
|
}
|
|
|
|
; PR11570
|
|
define void @float_call_signbit(double %n) {
|
|
; CHECK-LABEL: float_call_signbit:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movmskpd %xmm0, %edi
|
|
; CHECK-NEXT: andl $1, %edi
|
|
; CHECK-NEXT: jmp _float_call_signbit_callee ## TAILCALL
|
|
entry:
|
|
%t0 = bitcast double %n to i64
|
|
%tobool.i.i.i.i = icmp slt i64 %t0, 0
|
|
tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
|
|
ret void
|
|
}
|
|
declare void @float_call_signbit_callee(i1 zeroext)
|
|
|
|
; Known zeros
|
|
define i32 @knownbits_v2f64(<2 x double> %x) {
|
|
; CHECK-LABEL: knownbits_v2f64:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movmskpd %xmm0, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %x)
|
|
%2 = and i32 %1, 3
|
|
ret i32 %2
|
|
}
|
|
|
|
; Don't demand any movmsk signbits -> zero
|
|
define i32 @demandedbits_v16i8(<16 x i8> %x) {
|
|
; CHECK-LABEL: demandedbits_v16i8:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: xorl %eax, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %x)
|
|
%2 = and i32 %1, 65536
|
|
ret i32 %2
|
|
}
|
|
|
|
; Simplify demanded vector elts
|
|
define i32 @demandedelts_v4f32(<4 x float> %x) {
|
|
; CHECK-LABEL: demandedelts_v4f32:
|
|
; CHECK: ## %bb.0:
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: andl $1, %eax
|
|
; CHECK-NEXT: retq
|
|
%1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%2 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %1)
|
|
%3 = and i32 %2, 1
|
|
ret i32 %3
|
|
}
|
|
|
|
; rdar://10247336
|
|
; movmskp{s|d} only set low 4/2 bits, high bits are known zero
|
|
|
|
define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: t1:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movmskps %xmm0, %eax
|
|
; CHECK-NEXT: movl (%rdi,%rax,4), %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
|
|
%idxprom = sext i32 %0 to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
|
|
%1 = load i32, i32* %arrayidx, align 4
|
|
ret i32 %1
|
|
}
|
|
|
|
define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
|
|
; CHECK-LABEL: t2:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: movmskpd %xmm0, %eax
|
|
; CHECK-NEXT: movl (%rdi,%rax,4), %eax
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%0 = bitcast <4 x float> %x to <2 x double>
|
|
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
|
|
%idxprom = sext i32 %1 to i64
|
|
%arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
|
|
%2 = load i32, i32* %arrayidx, align 4
|
|
ret i32 %2
|
|
}
|
|
|
|
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
|
|
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
|
|
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|