1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00
llvm-mirror/test/CodeGen/X86/avx512-cmp.ll
Craig Topper fe9ffa8ba3 [LegalizeVectorOps] Improve handling of multi-result operations.
This system wasn't very well designed for multi-result nodes. As
a consequence they weren't consistently registered in the
LegalizedNodes map leading to nodes being revisited for different
results.

I've removed the "Result" variable from the main LegalizeOp method
and used a SDNode* instead. The result number from the incoming
Op SDValue is only used for deciding which result to return to the
caller. When LegalizeOp is called it should always register a
legalized result for all of its results. Future calls for any other
result should be pulled for the LegalizedNodes map.

Legal nodes will now register all of their results in the map
instead of just the one we were called for.

The Expand and Promote handling to use a vector of results similar
to LegalizeDAG. Each of the new results is then re-legalized and
logged in the LegalizedNodes map for all of the Results for the
node being legalized. None of the handles register their own
results now. And none call ReplaceAllUsesOfValueWith now.

Custom handling now always passes result number 0 to LowerOperation.
This matches what LegalizeDAG does. Since the introduction of
STRICT nodes, I've encountered several issues with X86's custom
handling being called with an SDValue pointing at the chain and
our custom handlers using that to get a VT instead of result 0.
This should prevent us from having any more of those issues. On
return we will update the LegalizedNodes map for all results so
we shouldn't call the custom handler again for each result number.

I want to push SDNode* further into the Expand and Promote
handlers, but I've left that for a follow to keep this patch size
down. I've created a dummy SDValue(Node, 0) to keep the handlers
working.

Differential Revision: https://reviews.llvm.org/D72224
2020-01-10 10:14:58 -08:00

220 lines
6.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
define double @test1(double %a, double %b) nounwind {
; ALL-LABEL: test1:
; ALL: ## %bb.0:
; ALL-NEXT: vucomisd %xmm1, %xmm0
; ALL-NEXT: jne LBB0_1
; ALL-NEXT: jnp LBB0_2
; ALL-NEXT: LBB0_1: ## %l1
; ALL-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
; ALL-NEXT: LBB0_2: ## %l2
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%tobool = fcmp une double %a, %b
br i1 %tobool, label %l1, label %l2
l1:
%c = fsub double %a, %b
ret double %c
l2:
%c1 = fadd double %a, %b
ret double %c1
}
define float @test2(float %a, float %b) nounwind {
; ALL-LABEL: test2:
; ALL: ## %bb.0:
; ALL-NEXT: vucomiss %xmm0, %xmm1
; ALL-NEXT: jbe LBB1_2
; ALL-NEXT: ## %bb.1: ## %l1
; ALL-NEXT: vsubss %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
; ALL-NEXT: LBB1_2: ## %l2
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%tobool = fcmp olt float %a, %b
br i1 %tobool, label %l1, label %l2
l1:
%c = fsub float %a, %b
ret float %c
l2:
%c1 = fadd float %a, %b
ret float %c1
}
define i32 @test3(float %a, float %b) {
; ALL-LABEL: test3:
; ALL: ## %bb.0:
; ALL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
; ALL-NEXT: kmovw %k0, %eax
; ALL-NEXT: retq
%cmp10.i = fcmp oeq float %a, %b
%conv11.i = zext i1 %cmp10.i to i32
ret i32 %conv11.i
}
define float @test5(float %p) #0 {
; ALL-LABEL: test5:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vucomiss %xmm1, %xmm0
; ALL-NEXT: jne LBB3_1
; ALL-NEXT: jp LBB3_1
; ALL-NEXT: ## %bb.2: ## %return
; ALL-NEXT: retq
; ALL-NEXT: LBB3_1: ## %if.end
; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1}
; ALL-NEXT: retq
entry:
%cmp = fcmp oeq float %p, 0.000000e+00
br i1 %cmp, label %return, label %if.end
if.end: ; preds = %entry
%cmp1 = fcmp ogt float %p, 0.000000e+00
%cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
br label %return
return: ; preds = %if.end, %entry
%retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
ret float %retval.0
}
define i32 @test6(i32 %a, i32 %b) {
; ALL-LABEL: test6:
; ALL: ## %bb.0:
; ALL-NEXT: xorl %eax, %eax
; ALL-NEXT: cmpl %esi, %edi
; ALL-NEXT: sete %al
; ALL-NEXT: retq
%cmp = icmp eq i32 %a, %b
%res = zext i1 %cmp to i32
ret i32 %res
}
define i32 @test7(double %x, double %y) #2 {
; ALL-LABEL: test7:
; ALL: ## %bb.0: ## %entry
; ALL-NEXT: xorl %eax, %eax
; ALL-NEXT: vucomisd %xmm1, %xmm0
; ALL-NEXT: setne %al
; ALL-NEXT: retq
entry:
%0 = fcmp one double %x, %y
%or = zext i1 %0 to i32
ret i32 %or
}
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
; ALL-LABEL: test8:
; ALL: ## %bb.0:
; ALL-NEXT: notl %edi
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
; ALL-NEXT: testl %edx, %edx
; ALL-NEXT: movl $1, %eax
; ALL-NEXT: cmovel %eax, %edx
; ALL-NEXT: orl %edi, %esi
; ALL-NEXT: cmovnel %edx, %eax
; ALL-NEXT: retq
%tmp1 = icmp eq i32 %a1, -1
%tmp2 = icmp eq i32 %a2, -2147483648
%tmp3 = and i1 %tmp1, %tmp2
%tmp4 = icmp eq i32 %a3, 0
%tmp5 = or i1 %tmp3, %tmp4
%res = select i1 %tmp5, i32 1, i32 %a3
ret i32 %res
}
define i32 @test9(i64 %a) {
; ALL-LABEL: test9:
; ALL: ## %bb.0:
; ALL-NEXT: testb $1, %dil
; ALL-NEXT: jne LBB7_2
; ALL-NEXT: ## %bb.1: ## %A
; ALL-NEXT: movl $6, %eax
; ALL-NEXT: retq
; ALL-NEXT: LBB7_2: ## %B
; ALL-NEXT: movl $7, %eax
; ALL-NEXT: retq
%b = and i64 %a, 1
%cmp10.i = icmp eq i64 %b, 0
br i1 %cmp10.i, label %A, label %B
A:
ret i32 6
B:
ret i32 7
}
define i32 @test10(i64 %b, i64 %c, i1 %d) {
; ALL-LABEL: test10:
; ALL: ## %bb.0:
; ALL-NEXT: movl %edx, %eax
; ALL-NEXT: andb $1, %al
; ALL-NEXT: cmpq %rsi, %rdi
; ALL-NEXT: sete %cl
; ALL-NEXT: orb %dl, %cl
; ALL-NEXT: andb $1, %cl
; ALL-NEXT: cmpb %cl, %al
; ALL-NEXT: je LBB8_1
; ALL-NEXT: ## %bb.2: ## %if.end.i
; ALL-NEXT: movl $6, %eax
; ALL-NEXT: retq
; ALL-NEXT: LBB8_1: ## %if.then.i
; ALL-NEXT: movl $5, %eax
; ALL-NEXT: retq
%cmp8.i = icmp eq i64 %b, %c
%or1 = or i1 %d, %cmp8.i
%xor1 = xor i1 %d, %or1
br i1 %xor1, label %if.end.i, label %if.then.i
if.then.i:
ret i32 5
if.end.i:
ret i32 6
}
; This test previously caused an infinite loop in legalize vector ops. Due to
; CSE triggering on the call to UpdateNodeOperands and the resulting node not
; being passed to LowerOperation. The add is needed to force the zext into a
; sext on that path. The shuffle keeps the zext alive. The xor somehow
; influences the zext to be visited before the sext exposing the CSE opportunity
; for the sext since zext of setcc is custom legalized to a sext and shift.
define <8 x i32> @legalize_loop(<8 x double> %arg) {
; KNL-LABEL: legalize_loop:
; KNL: ## %bb.0:
; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; KNL-NEXT: vcmpnltpd %zmm0, %zmm1, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpsrld $31, %ymm0, %ymm1
; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; KNL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
; KNL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: legalize_loop:
; SKX: ## %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; SKX-NEXT: vcmpnltpd %zmm0, %zmm1, %k0
; SKX-NEXT: vpmovm2d %k0, %ymm0
; SKX-NEXT: vpsrld $31, %ymm0, %ymm1
; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
; SKX-NEXT: vpsubd %ymm0, %ymm1, %ymm0
; SKX-NEXT: retq
%tmp = fcmp ogt <8 x double> %arg, zeroinitializer
%tmp1 = xor <8 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
%tmp2 = zext <8 x i1> %tmp1 to <8 x i32>
%tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
%tmp4 = add <8 x i32> %tmp2, %tmp3
ret <8 x i32> %tmp4
}