1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/X86/movmsk.ll
Nirav Dave 6d5f31ab18 [DAGCombine] Prune unnused nodes.
Summary:
Nodes that have no uses are eventually pruned when they are selected
from the worklist. Record nodes newly added to the worklist or DAG and
perform pruning after every combine attempt.

Reviewers: efriedma, RKSimon, craig.topper, spatel, jyknight

Reviewed By: jyknight

Subscribers: jdoerfert, jyknight, nemanjai, jvesely, nhaehnle, javed.absar, hiraditya, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58070

llvm-svn: 357283
2019-03-29 17:35:56 +00:00

182 lines
6.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.6.6 -mattr=+sse4.1 | FileCheck %s
%0 = type { double }
%union.anon = type { float }
define i32 @double_signbit(double %d1) nounwind uwtable readnone ssp {
; CHECK-LABEL: double_signbit:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movmskpd %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
entry:
%__x.addr.i = alloca double, align 8
%__u.i = alloca %0, align 8
%0 = bitcast double* %__x.addr.i to i8*
%1 = bitcast %0* %__u.i to i8*
store double %d1, double* %__x.addr.i, align 8
%__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
store double %d1, double* %__f.i, align 8
%tmp = bitcast double %d1 to i64
%tmp1 = lshr i64 %tmp, 63
%shr.i = trunc i64 %tmp1 to i32
ret i32 %shr.i
}
define i32 @double_add_signbit(double %d1, double %d2) nounwind uwtable readnone ssp {
; CHECK-LABEL: double_add_signbit:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: addsd %xmm1, %xmm0
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movmskpd %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
entry:
%__x.addr.i = alloca double, align 8
%__u.i = alloca %0, align 8
%add = fadd double %d1, %d2
%0 = bitcast double* %__x.addr.i to i8*
%1 = bitcast %0* %__u.i to i8*
store double %add, double* %__x.addr.i, align 8
%__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
store double %add, double* %__f.i, align 8
%tmp = bitcast double %add to i64
%tmp1 = lshr i64 %tmp, 63
%shr.i = trunc i64 %tmp1 to i32
ret i32 %shr.i
}
define i32 @float_signbit(float %f1) nounwind uwtable readnone ssp {
; CHECK-LABEL: float_signbit:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
entry:
%__x.addr.i = alloca float, align 4
%__u.i = alloca %union.anon, align 4
%0 = bitcast float* %__x.addr.i to i8*
%1 = bitcast %union.anon* %__u.i to i8*
store float %f1, float* %__x.addr.i, align 4
%__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
store float %f1, float* %__f.i, align 4
%2 = bitcast float %f1 to i32
%shr.i = lshr i32 %2, 31
ret i32 %shr.i
}
define i32 @float_add_signbit(float %f1, float %f2) nounwind uwtable readnone ssp {
; CHECK-LABEL: float_add_signbit:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: addss %xmm1, %xmm0
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
entry:
%__x.addr.i = alloca float, align 4
%__u.i = alloca %union.anon, align 4
%add = fadd float %f1, %f2
%0 = bitcast float* %__x.addr.i to i8*
%1 = bitcast %union.anon* %__u.i to i8*
store float %add, float* %__x.addr.i, align 4
%__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
store float %add, float* %__f.i, align 4
%2 = bitcast float %add to i32
%shr.i = lshr i32 %2, 31
ret i32 %shr.i
}
; PR11570
define void @float_call_signbit(double %n) {
; CHECK-LABEL: float_call_signbit:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movmskpd %xmm0, %edi
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: jmp _float_call_signbit_callee ## TAILCALL
entry:
%t0 = bitcast double %n to i64
%tobool.i.i.i.i = icmp slt i64 %t0, 0
tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
ret void
}
declare void @float_call_signbit_callee(i1 zeroext)
; Known zeros
define i32 @knownbits_v2f64(<2 x double> %x) {
; CHECK-LABEL: knownbits_v2f64:
; CHECK: ## %bb.0:
; CHECK-NEXT: movmskpd %xmm0, %eax
; CHECK-NEXT: retq
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %x)
%2 = and i32 %1, 3
ret i32 %2
}
; Don't demand any movmsk signbits -> zero
define i32 @demandedbits_v16i8(<16 x i8> %x) {
; CHECK-LABEL: demandedbits_v16i8:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
%1 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %x)
%2 = and i32 %1, 65536
ret i32 %2
}
; Simplify demanded vector elts
define i32 @demandedelts_v4f32(<4 x float> %x) {
; CHECK-LABEL: demandedelts_v4f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> zeroinitializer
%2 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %1)
%3 = and i32 %2, 1
ret i32 %3
}
; rdar://10247336
; movmskp{s|d} only set low 4/2 bits, high bits are known zero
define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
; CHECK-LABEL: t1:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movmskps %xmm0, %eax
; CHECK-NEXT: movl (%rdi,%rax,4), %eax
; CHECK-NEXT: retq
entry:
%0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
%idxprom = sext i32 %0 to i64
%arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
%1 = load i32, i32* %arrayidx, align 4
ret i32 %1
}
define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
; CHECK-LABEL: t2:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movmskpd %xmm0, %eax
; CHECK-NEXT: movl (%rdi,%rax,4), %eax
; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x float> %x to <2 x double>
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
%idxprom = sext i32 %1 to i64
%arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
%2 = load i32, i32* %arrayidx, align 4
ret i32 %2
}
declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone