mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
fe9ffa8ba3
This system wasn't very well designed for multi-result nodes. As a consequence they weren't consistently registered in the LegalizedNodes map leading to nodes being revisited for different results. I've removed the "Result" variable from the main LegalizeOp method and used a SDNode* instead. The result number from the incoming Op SDValue is only used for deciding which result to return to the caller. When LegalizeOp is called it should always register a legalized result for all of its results. Future calls for any other result should be pulled for the LegalizedNodes map. Legal nodes will now register all of their results in the map instead of just the one we were called for. The Expand and Promote handling to use a vector of results similar to LegalizeDAG. Each of the new results is then re-legalized and logged in the LegalizedNodes map for all of the Results for the node being legalized. None of the handles register their own results now. And none call ReplaceAllUsesOfValueWith now. Custom handling now always passes result number 0 to LowerOperation. This matches what LegalizeDAG does. Since the introduction of STRICT nodes, I've encountered several issues with X86's custom handling being called with an SDValue pointing at the chain and our custom handlers using that to get a VT instead of result 0. This should prevent us from having any more of those issues. On return we will update the LegalizedNodes map for all results so we shouldn't call the custom handler again for each result number. I want to push SDNode* further into the Expand and Promote handlers, but I've left that for a follow to keep this patch size down. I've created a dummy SDValue(Node, 0) to keep the handlers working. Differential Revision: https://reviews.llvm.org/D72224
220 lines
6.3 KiB
LLVM
220 lines
6.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
|
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
|
|
|
|
define double @test1(double %a, double %b) nounwind {
|
|
; ALL-LABEL: test1:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: vucomisd %xmm1, %xmm0
|
|
; ALL-NEXT: jne LBB0_1
|
|
; ALL-NEXT: jnp LBB0_2
|
|
; ALL-NEXT: LBB0_1: ## %l1
|
|
; ALL-NEXT: vsubsd %xmm1, %xmm0, %xmm0
|
|
; ALL-NEXT: retq
|
|
; ALL-NEXT: LBB0_2: ## %l2
|
|
; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
|
|
; ALL-NEXT: retq
|
|
%tobool = fcmp une double %a, %b
|
|
br i1 %tobool, label %l1, label %l2
|
|
|
|
l1:
|
|
%c = fsub double %a, %b
|
|
ret double %c
|
|
l2:
|
|
%c1 = fadd double %a, %b
|
|
ret double %c1
|
|
}
|
|
|
|
define float @test2(float %a, float %b) nounwind {
|
|
; ALL-LABEL: test2:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: vucomiss %xmm0, %xmm1
|
|
; ALL-NEXT: jbe LBB1_2
|
|
; ALL-NEXT: ## %bb.1: ## %l1
|
|
; ALL-NEXT: vsubss %xmm1, %xmm0, %xmm0
|
|
; ALL-NEXT: retq
|
|
; ALL-NEXT: LBB1_2: ## %l2
|
|
; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0
|
|
; ALL-NEXT: retq
|
|
%tobool = fcmp olt float %a, %b
|
|
br i1 %tobool, label %l1, label %l2
|
|
|
|
l1:
|
|
%c = fsub float %a, %b
|
|
ret float %c
|
|
l2:
|
|
%c1 = fadd float %a, %b
|
|
ret float %c1
|
|
}
|
|
|
|
define i32 @test3(float %a, float %b) {
|
|
; ALL-LABEL: test3:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: vcmpeqss %xmm1, %xmm0, %k0
|
|
; ALL-NEXT: kmovw %k0, %eax
|
|
; ALL-NEXT: retq
|
|
|
|
%cmp10.i = fcmp oeq float %a, %b
|
|
%conv11.i = zext i1 %cmp10.i to i32
|
|
ret i32 %conv11.i
|
|
}
|
|
|
|
define float @test5(float %p) #0 {
|
|
; ALL-LABEL: test5:
|
|
; ALL: ## %bb.0: ## %entry
|
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
|
; ALL-NEXT: vucomiss %xmm1, %xmm0
|
|
; ALL-NEXT: jne LBB3_1
|
|
; ALL-NEXT: jp LBB3_1
|
|
; ALL-NEXT: ## %bb.2: ## %return
|
|
; ALL-NEXT: retq
|
|
; ALL-NEXT: LBB3_1: ## %if.end
|
|
; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1
|
|
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
|
; ALL-NEXT: vmovss {{.*}}(%rip), %xmm0 {%k1}
|
|
; ALL-NEXT: retq
|
|
entry:
|
|
%cmp = fcmp oeq float %p, 0.000000e+00
|
|
br i1 %cmp, label %return, label %if.end
|
|
|
|
if.end: ; preds = %entry
|
|
%cmp1 = fcmp ogt float %p, 0.000000e+00
|
|
%cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
|
|
br label %return
|
|
|
|
return: ; preds = %if.end, %entry
|
|
%retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
|
|
ret float %retval.0
|
|
}
|
|
|
|
define i32 @test6(i32 %a, i32 %b) {
|
|
; ALL-LABEL: test6:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: xorl %eax, %eax
|
|
; ALL-NEXT: cmpl %esi, %edi
|
|
; ALL-NEXT: sete %al
|
|
; ALL-NEXT: retq
|
|
%cmp = icmp eq i32 %a, %b
|
|
%res = zext i1 %cmp to i32
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test7(double %x, double %y) #2 {
|
|
; ALL-LABEL: test7:
|
|
; ALL: ## %bb.0: ## %entry
|
|
; ALL-NEXT: xorl %eax, %eax
|
|
; ALL-NEXT: vucomisd %xmm1, %xmm0
|
|
; ALL-NEXT: setne %al
|
|
; ALL-NEXT: retq
|
|
entry:
|
|
%0 = fcmp one double %x, %y
|
|
%or = zext i1 %0 to i32
|
|
ret i32 %or
|
|
}
|
|
|
|
define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
|
|
; ALL-LABEL: test8:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: notl %edi
|
|
; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000
|
|
; ALL-NEXT: testl %edx, %edx
|
|
; ALL-NEXT: movl $1, %eax
|
|
; ALL-NEXT: cmovel %eax, %edx
|
|
; ALL-NEXT: orl %edi, %esi
|
|
; ALL-NEXT: cmovnel %edx, %eax
|
|
; ALL-NEXT: retq
|
|
%tmp1 = icmp eq i32 %a1, -1
|
|
%tmp2 = icmp eq i32 %a2, -2147483648
|
|
%tmp3 = and i1 %tmp1, %tmp2
|
|
%tmp4 = icmp eq i32 %a3, 0
|
|
%tmp5 = or i1 %tmp3, %tmp4
|
|
%res = select i1 %tmp5, i32 1, i32 %a3
|
|
ret i32 %res
|
|
}
|
|
|
|
define i32 @test9(i64 %a) {
|
|
; ALL-LABEL: test9:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: testb $1, %dil
|
|
; ALL-NEXT: jne LBB7_2
|
|
; ALL-NEXT: ## %bb.1: ## %A
|
|
; ALL-NEXT: movl $6, %eax
|
|
; ALL-NEXT: retq
|
|
; ALL-NEXT: LBB7_2: ## %B
|
|
; ALL-NEXT: movl $7, %eax
|
|
; ALL-NEXT: retq
|
|
%b = and i64 %a, 1
|
|
%cmp10.i = icmp eq i64 %b, 0
|
|
br i1 %cmp10.i, label %A, label %B
|
|
A:
|
|
ret i32 6
|
|
B:
|
|
ret i32 7
|
|
}
|
|
|
|
define i32 @test10(i64 %b, i64 %c, i1 %d) {
|
|
; ALL-LABEL: test10:
|
|
; ALL: ## %bb.0:
|
|
; ALL-NEXT: movl %edx, %eax
|
|
; ALL-NEXT: andb $1, %al
|
|
; ALL-NEXT: cmpq %rsi, %rdi
|
|
; ALL-NEXT: sete %cl
|
|
; ALL-NEXT: orb %dl, %cl
|
|
; ALL-NEXT: andb $1, %cl
|
|
; ALL-NEXT: cmpb %cl, %al
|
|
; ALL-NEXT: je LBB8_1
|
|
; ALL-NEXT: ## %bb.2: ## %if.end.i
|
|
; ALL-NEXT: movl $6, %eax
|
|
; ALL-NEXT: retq
|
|
; ALL-NEXT: LBB8_1: ## %if.then.i
|
|
; ALL-NEXT: movl $5, %eax
|
|
; ALL-NEXT: retq
|
|
|
|
%cmp8.i = icmp eq i64 %b, %c
|
|
%or1 = or i1 %d, %cmp8.i
|
|
%xor1 = xor i1 %d, %or1
|
|
br i1 %xor1, label %if.end.i, label %if.then.i
|
|
|
|
if.then.i:
|
|
ret i32 5
|
|
|
|
if.end.i:
|
|
ret i32 6
|
|
}
|
|
|
|
; This test previously caused an infinite loop in legalize vector ops. Due to
|
|
; CSE triggering on the call to UpdateNodeOperands and the resulting node not
|
|
; being passed to LowerOperation. The add is needed to force the zext into a
|
|
; sext on that path. The shuffle keeps the zext alive. The xor somehow
|
|
; influences the zext to be visited before the sext exposing the CSE opportunity
|
|
; for the sext since zext of setcc is custom legalized to a sext and shift.
|
|
define <8 x i32> @legalize_loop(<8 x double> %arg) {
|
|
; KNL-LABEL: legalize_loop:
|
|
; KNL: ## %bb.0:
|
|
; KNL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; KNL-NEXT: vcmpnltpd %zmm0, %zmm1, %k1
|
|
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
|
; KNL-NEXT: vpsrld $31, %ymm0, %ymm1
|
|
; KNL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
|
|
; KNL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
|
; KNL-NEXT: vpsubd %ymm0, %ymm1, %ymm0
|
|
; KNL-NEXT: retq
|
|
;
|
|
; SKX-LABEL: legalize_loop:
|
|
; SKX: ## %bb.0:
|
|
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
|
; SKX-NEXT: vcmpnltpd %zmm0, %zmm1, %k0
|
|
; SKX-NEXT: vpmovm2d %k0, %ymm0
|
|
; SKX-NEXT: vpsrld $31, %ymm0, %ymm1
|
|
; SKX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[3,2,1,0,7,6,5,4]
|
|
; SKX-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
|
; SKX-NEXT: vpsubd %ymm0, %ymm1, %ymm0
|
|
; SKX-NEXT: retq
|
|
%tmp = fcmp ogt <8 x double> %arg, zeroinitializer
|
|
%tmp1 = xor <8 x i1> %tmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
|
|
%tmp2 = zext <8 x i1> %tmp1 to <8 x i32>
|
|
%tmp3 = shufflevector <8 x i32> %tmp2, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
|
%tmp4 = add <8 x i32> %tmp2, %tmp3
|
|
ret <8 x i32> %tmp4
|
|
}
|