[X86][SSE][AVX-512] Convert FAND/FOR/FXOR/FANDN nodes to integer operations if they are available. This will allow a bunch of patterns to be removed.

These nodes are only emitted for lowering FABS/FNEG/FNABS/FCOPYSIGN. Ideally we just wouldn't create these nodes if SSE2 or higher is available, but it was simple to just convert them in DAG combine. For SSE2, AVX, and AVX512 with DQI this is no functional change as the execution domain fixing pass ensures the right domain is selected regardless of the ISD opcode. For AVX-512 without DQI we end up using integer instructions since the floating point versions aren't available. But we were already doing that for any logical operations in code that didn't come from FABS/FNEG/FNABS/FCOPYSIGN so this seems no worse. And we get the benefit of being able to fold broadcasts now. llvm-svn: 290060
2025-02-01 05:01:59 +01:00 · 2016-12-18 07:54:23 +00:00 · 2016-12-18 07:54:23 +00:00 · 207263b7e6
commit 207263b7e6
parent a68fa9215c
5 changed files with 72 additions and 58 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -31905,23 +31905,24 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,

 static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
                                 const X86Subtarget &Subtarget) {
-  EVT VT = N->getValueType(0);
-  if (VT.is512BitVector() && !Subtarget.hasDQI()) {
-    // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extension.
-    // These logic operations may be executed in the integer domain.
+  MVT VT = N->getSimpleValueType(0);
+  // If we have integer vector types available, use the integer opcodes.
+  if (VT.isVector() && Subtarget.hasSSE2()) {
    SDLoc dl(N);

-    SDValue Op0 = DAG.getBitcast(MVT::v8i64, N->getOperand(0));
-    SDValue Op1 = DAG.getBitcast(MVT::v8i64, N->getOperand(1));
-    unsigned IntOpcode = 0;
+    MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+
+    SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
+    SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
+    unsigned IntOpcode;
    switch (N->getOpcode()) {
-      default: llvm_unreachable("Unexpected FP logic op");
-      case X86ISD::FOR: IntOpcode = ISD::OR; break;
-      case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
-      case X86ISD::FAND: IntOpcode = ISD::AND; break;
-      case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+    default: llvm_unreachable("Unexpected FP logic op");
+    case X86ISD::FOR: IntOpcode = ISD::OR; break;
+    case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+    case X86ISD::FAND: IntOpcode = ISD::AND; break;
+    case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
    }
-    SDValue IntOp = DAG.getNode(IntOpcode, dl, MVT::v8i64, Op0, Op1);
+    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
    return DAG.getBitcast(VT, IntOp);
  }
  return SDValue();
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@ -1011,8 +1011,7 @@ define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
 ;
 ; AVX512VL-LABEL: test_fxor_8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; AVX512VL-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_fxor_8f32:
--- a/test/CodeGen/X86/pr13577.ll
+++ b/test/CodeGen/X86/pr13577.ll
@ -30,10 +30,9 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
 define float @pr26070() {
 ; CHECK-LABEL: pr26070:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
  %c = call float @copysignf(float 1.0, float undef) readnone
--- a/test/CodeGen/X86/vec-copysign-avx512.ll
+++ b/test/CodeGen/X86/vec-copysign-avx512.ll
@ -5,11 +5,9 @@
 define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 ; AVX512VL-LABEL: v4f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f32:
@ -25,11 +23,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
 ; AVX512VL-LABEL: v8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v8f32:
@ -61,12 +57,19 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind {
 }

 define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
-; CHECK-LABEL: v2f64:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; AVX512VL-LABEL: v2f64:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: v2f64:
+; AVX512VLDQ:       ## BB#0:
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
  %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
  ret <2 x double> %tmp
 }
@ -74,11 +77,9 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
 define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
 ; AVX512VL-LABEL: v4f64:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f64:
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@ -10,15 +10,35 @@
 ; 2013.

 define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X32-LABEL: fabs_v2f64:
-; X32:       # BB#0:
-; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT:    retl
+; X32_AVX-LABEL: fabs_v2f64:
+; X32_AVX:       # BB#0:
+; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX-NEXT:    retl
 ;
-; X64-LABEL: fabs_v2f64:
-; X64:       # BB#0:
-; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT:    retq
+; X32_AVX512VL-LABEL: fabs_v2f64:
+; X32_AVX512VL:       # BB#0:
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    retl
+;
+; X32_AVX512VLDQ-LABEL: fabs_v2f64:
+; X32_AVX512VLDQ:       # BB#0:
+; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VLDQ-NEXT:    retl
+;
+; X64_AVX-LABEL: fabs_v2f64:
+; X64_AVX:       # BB#0:
+; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX-NEXT:    retq
+;
+; X64_AVX512VL-LABEL: fabs_v2f64:
+; X64_AVX512VL:       # BB#0:
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    retq
+;
+; X64_AVX512VLDQ-LABEL: fabs_v2f64:
+; X64_AVX512VLDQ:       # BB#0:
+; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VLDQ-NEXT:    retq
  %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
  ret <2 x double> %t
 }
@ -32,8 +52,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %xmm1
-; X32_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f32:
@ -48,8 +67,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
-; X64_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f32:
@ -69,8 +87,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f64:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastsd {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f64:
@ -85,8 +102,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f64:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f64:
@ -106,8 +122,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ;
 ; X32_AVX512VL-LABEL: fabs_v8f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v8f32:
@ -122,8 +137,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ;
 ; X64_AVX512VL-LABEL: fabs_v8f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v8f32: