diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1a8592e6264..107af0ca3c9 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -841,6 +841,49 @@ void X86DAGToDAGISel::PreprocessISelDAG() { CurDAG->DeleteNode(N); continue; } + case X86ISD::FANDN: + case X86ISD::FAND: + case X86ISD::FOR: + case X86ISD::FXOR: { + // Widen scalar fp logic ops to vector to reduce isel patterns. + // FIXME: Can we do this during lowering/combine. + MVT VT = N->getSimpleValueType(0); + if (VT.isVector() || VT == MVT::f128) + break; + + MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32; + SDLoc dl(N); + SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, + N->getOperand(0)); + SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, + N->getOperand(1)); + + SDValue Res; + if (Subtarget->hasSSE2()) { + EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); + Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); + Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode!"); + case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; + case X86ISD::FAND: Opc = ISD::AND; break; + case X86ISD::FOR: Opc = ISD::OR; break; + case X86ISD::FXOR: Opc = ISD::XOR; break; + } + Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); + Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); + } else { + Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); + } + Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, + CurDAG->getIntPtrConstant(0, dl)); + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + ++I; + CurDAG->DeleteNode(N); + continue; + } } if (OptLevel != CodeGenOpt::None && diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4ca4fb7b122..fbadd80b242 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5657,51 +5657,6 @@ defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, SchedWriteFLogicSizes, 1>; -let Predicates = [HasVLX,HasDQI] in { - // Use packed logical operations for scalar ops. - def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), - (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), - FR64X)>; - def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), - (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), - FR64X)>; - def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), - (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), - FR64X)>; - def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), - (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))), - FR64X)>; - - def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), - FR32X)>; - def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), - FR32X)>; - def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), - FR32X)>; - def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), - (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))), - FR32X)>; -} - multiclass avx512_fp_scalef_p opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 27bcc2dc901..ade5645d5bf 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2417,99 +2417,6 @@ let Predicates = [HasAVX1Only] in { (VANDNPSYrm VR256:$src1, addr:$src2)>; } -let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { - // Use packed logical operations for scalar ops. - def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VXORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (VANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - - def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VXORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (VANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; -} - -let Predicates = [UseSSE1] in { - // Use packed logical operations for scalar ops. - def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (ANDPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (ORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (XORPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; - def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)), - (COPY_TO_REGCLASS - (v4f32 (ANDNPSrr (v4f32 (COPY_TO_REGCLASS FR32:$src1, VR128)), - (v4f32 (COPY_TO_REGCLASS FR32:$src2, VR128)))), - FR32)>; -} - -let Predicates = [UseSSE2] in { - // Use packed logical operations for scalar ops. - def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (ANDPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (ORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (XORPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; - def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)), - (COPY_TO_REGCLASS - (v2f64 (ANDNPDrr (v2f64 (COPY_TO_REGCLASS FR64:$src1, VR128)), - (v2f64 (COPY_TO_REGCLASS FR64:$src2, VR128)))), - FR64)>; -} - let Predicates = [HasAVX, NoVLX] in { def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)), (VPANDrr VR128:$src1, VR128:$src2)>; diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll index 7576aa3eb9c..7dc44f182cb 100644 --- a/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -631,7 +631,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE3_32_WIN-NEXT: subsd %xmm1, %xmm0 ; SSE3_32_WIN-NEXT: andnpd %xmm0, %xmm3 ; SSE3_32_WIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_WIN-NEXT: movsd %xmm2, (%esp) +; SSE3_32_WIN-NEXT: movlpd %xmm2, (%esp) ; SSE3_32_WIN-NEXT: fldl (%esp) ; SSE3_32_WIN-NEXT: fisttpll (%esp) ; SSE3_32_WIN-NEXT: setbe %dl @@ -656,7 +656,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE3_32_LIN-NEXT: subsd %xmm1, %xmm0 ; SSE3_32_LIN-NEXT: andnpd %xmm0, %xmm3 ; SSE3_32_LIN-NEXT: orpd %xmm3, %xmm2 -; SSE3_32_LIN-NEXT: movsd %xmm2, (%esp) +; SSE3_32_LIN-NEXT: movlpd %xmm2, (%esp) ; SSE3_32_LIN-NEXT: fldl (%esp) ; SSE3_32_LIN-NEXT: fisttpll (%esp) ; SSE3_32_LIN-NEXT: setbe %dl @@ -695,7 +695,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE2_32_WIN-NEXT: andnpd %xmm2, %xmm4 ; SSE2_32_WIN-NEXT: andpd %xmm0, %xmm3 ; SSE2_32_WIN-NEXT: orpd %xmm4, %xmm3 -; SSE2_32_WIN-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) +; SSE2_32_WIN-NEXT: movlpd %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -727,7 +727,7 @@ define i64 @d_to_u64(double %a) nounwind { ; SSE2_32_LIN-NEXT: andnpd %xmm2, %xmm4 ; SSE2_32_LIN-NEXT: andpd %xmm0, %xmm3 ; SSE2_32_LIN-NEXT: orpd %xmm4, %xmm3 -; SSE2_32_LIN-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) +; SSE2_32_LIN-NEXT: movlpd %xmm3, {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax diff --git a/test/CodeGen/X86/sqrt-fastmath-mir.ll b/test/CodeGen/X86/sqrt-fastmath-mir.ll index 30f528cebb2..bbeeb6828ca 100644 --- a/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -19,7 +19,7 @@ define float @foo(float %f) #0 { ; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10 ; CHECK: %14:fr32 = FsFLD0SS ; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0 -; CHECK: %17:vr128 = VANDNPSrr killed %16, killed %13 +; CHECK: %17:vr128 = VPANDNrr killed %16, killed %13 ; CHECK: $xmm0 = COPY %18 ; CHECK: RET 0, $xmm0 %call = tail call float @llvm.sqrt.f32(float %f) #1