1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-23 13:02:52 +02:00

[AVX-512] Add patterns to make fp compare instructions commutable during isel.

llvm-svn: 314598
This commit is contained in:
Craig Topper 2017-09-30 17:02:39 +00:00
parent 3fb32d9745
commit 561ca5cd68
3 changed files with 345 additions and 3 deletions

View File

@ -2077,7 +2077,33 @@ multiclass avx512_vcmp_common<X86VectorVTInfo _> {
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
}
}
}
// Patterns for selecting with loads in other operand.
def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
CommutableCMPCC:$cc),
(!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
imm:$cc)>;
def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
(_.VT _.RC:$src1),
CommutableCMPCC:$cc)),
(!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1), CommutableCMPCC:$cc),
(!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
imm:$cc)>;
def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
(_.ScalarLdFrag addr:$src2)),
(_.VT _.RC:$src1),
CommutableCMPCC:$cc)),
(!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
_.RC:$src1, addr:$src2,
imm:$cc)>;
}
multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
@ -2119,6 +2145,17 @@ defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
CommutableCMPCC:$cc)),
(VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
CommutableCMPCC:$cc)),
(VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
}
// ----------------------------------------------------------------
// FPClass
//handle fpclass instruction mask = op(reg_scalar,imm)

View File

@ -2308,6 +2308,58 @@ let Constraints = "$src1 = $dst" in {
SSEPackedDouble, memopv2f64, SSE_ALU_F64P>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
return (N->getZExtValue() == 0x00 || N->getZExtValue() == 0x03 ||
N->getZExtValue() == 0x04 || N->getZExtValue() == 0x07);
}]>;
// Patterns to select compares with loads in first operand.
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
(VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1,
CommutableCMPCC:$cc)),
(VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
(VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
(VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
}
let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1,
CommutableCMPCC:$cc)),
(CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>;
}
let Predicates = [UseSSE1] in {
def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1,
CommutableCMPCC:$cc)),
(CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>;
def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1,
CommutableCMPCC:$cc)),
(CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>;
}
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
//===----------------------------------------------------------------------===//

View File

@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 -disable-peephole | FileCheck %s --check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512vl -disable-peephole | FileCheck %s --check-prefix=AVX512
;
; Float Comparisons
@ -17,6 +18,13 @@ define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_eq:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpeqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp oeq <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -33,6 +41,13 @@ define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpneqps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ne:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpneqps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp une <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -49,6 +64,13 @@ define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpordps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ord:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpordps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ord <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -65,6 +87,13 @@ define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpunordps (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_uno:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpunordps (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp uno <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -88,6 +117,16 @@ define <4 x i32> @commute_cmpps_ueq(<4 x float>* %a0, <4 x float> %a1) {
; AVX-NEXT: vcmpunordps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vorps %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ueq:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpeqps %xmm0, %xmm1, %k0
; AVX512-NEXT: vcmpunordps %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ueq <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -111,6 +150,15 @@ define <4 x i32> @commute_cmpps_one(<4 x float>* %a0, <4 x float> %a1) {
; AVX-NEXT: vcmpordps %xmm0, %xmm1, %xmm0
; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_one:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpordps %xmm0, %xmm1, %k1
; AVX512-NEXT: vcmpneqps %xmm0, %xmm1, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp one <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -130,6 +178,14 @@ define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) {
; AVX-NEXT: vmovaps (%rdi), %xmm1
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_lt:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp olt <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -149,6 +205,14 @@ define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) {
; AVX-NEXT: vmovaps (%rdi), %xmm1
; AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_le:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %xmm1
; AVX512-NEXT: vcmpleps %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %a0
%2 = fcmp ole <4 x float> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i32>
@ -166,6 +230,13 @@ define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_eq_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpeqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp oeq <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -183,6 +254,13 @@ define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpneqps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ne_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpneqps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp une <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -200,6 +278,13 @@ define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpordps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ord_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpordps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ord <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -217,6 +302,13 @@ define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpunordps (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_uno_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpunordps (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp uno <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -245,6 +337,16 @@ define <8 x i32> @commute_cmpps_ueq_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX-NEXT: vcmpunordps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_ueq_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpeqps %ymm0, %ymm1, %k0
; AVX512-NEXT: vcmpunordps %ymm0, %ymm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ueq <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -273,6 +375,15 @@ define <8 x i32> @commute_cmpps_one_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX-NEXT: vcmpordps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_one_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpordps %ymm0, %ymm1, %k1
; AVX512-NEXT: vcmpneqps %ymm0, %ymm1, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp one <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -295,6 +406,14 @@ define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX-NEXT: vmovaps (%rdi), %ymm1
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_lt_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp olt <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -317,6 +436,14 @@ define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) {
; AVX-NEXT: vmovaps (%rdi), %ymm1
; AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmpps_le_ymm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovaps (%rdi), %ymm1
; AVX512-NEXT: vcmpleps %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %a0
%2 = fcmp ole <8 x float> %1, %a1
%3 = sext <8 x i1> %2 to <8 x i32>
@ -338,6 +465,13 @@ define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_eq:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpeqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp oeq <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -354,6 +488,13 @@ define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpneqpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ne:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpneqpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp une <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -370,6 +511,13 @@ define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpordpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ord:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpordpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ord <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -393,6 +541,16 @@ define <2 x i64> @commute_cmppd_ueq(<2 x double>* %a0, <2 x double> %a1) {
; AVX-NEXT: vcmpunordpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ueq:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmpeqpd %xmm0, %xmm1, %k0
; AVX512-NEXT: vcmpunordpd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ueq <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -416,6 +574,15 @@ define <2 x i64> @commute_cmppd_one(<2 x double>* %a0, <2 x double> %a1) {
; AVX-NEXT: vcmpordpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_one:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmpordpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vcmpneqpd %xmm0, %xmm1, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp one <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -432,6 +599,13 @@ define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpunordpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_uno:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpunordpd (%rdi), %xmm0, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp uno <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -451,6 +625,14 @@ define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) {
; AVX-NEXT: vmovapd (%rdi), %xmm1
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_lt:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp olt <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -470,6 +652,14 @@ define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) {
; AVX-NEXT: vmovapd (%rdi), %xmm1
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_le:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %xmm1
; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %a0
%2 = fcmp ole <2 x double> %1, %a1
%3 = sext <2 x i1> %2 to <2 x i64>
@ -487,6 +677,13 @@ define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_eq_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpeqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp oeq <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -504,6 +701,13 @@ define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpneqpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ne_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpneqpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp une <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -521,6 +725,13 @@ define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpordpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ord_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpordpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ord <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -538,6 +749,13 @@ define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX: # BB#0:
; AVX-NEXT: vcmpunordpd (%rdi), %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_uno_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vcmpunordpd (%rdi), %ymm0, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp uno <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -566,6 +784,16 @@ define <4 x i64> @commute_cmppd_ueq_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX-NEXT: vcmpunordpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vorpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_ueq_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmpeqpd %ymm0, %ymm1, %k0
; AVX512-NEXT: vcmpunordpd %ymm0, %ymm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ueq <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -594,6 +822,15 @@ define <4 x i64> @commute_cmppd_one_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX-NEXT: vcmpordpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vandpd %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_one_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmpordpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vcmpneqpd %ymm0, %ymm1, %k1 {%k1}
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp one <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -616,6 +853,14 @@ define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX-NEXT: vmovapd (%rdi), %ymm1
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_lt_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp olt <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>
@ -638,6 +883,14 @@ define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) {
; AVX-NEXT: vmovapd (%rdi), %ymm1
; AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: commute_cmppd_le_ymmm:
; AVX512: # BB#0:
; AVX512-NEXT: vmovapd (%rdi), %ymm1
; AVX512-NEXT: vcmplepd %ymm0, %ymm1, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %a0
%2 = fcmp ole <4 x double> %1, %a1
%3 = sext <4 x i1> %2 to <4 x i64>