diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 088119e4924..351a22c46fe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31905,23 +31905,24 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget.hasDQI()) { - // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extension. - // These logic operations may be executed in the integer domain. + MVT VT = N->getSimpleValueType(0); + // If we have integer vector types available, use the integer opcodes. + if (VT.isVector() && Subtarget.hasSSE2()) { SDLoc dl(N); - SDValue Op0 = DAG.getBitcast(MVT::v8i64, N->getOperand(0)); - SDValue Op1 = DAG.getBitcast(MVT::v8i64, N->getOperand(1)); - unsigned IntOpcode = 0; + MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64); + + SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); + unsigned IntOpcode; switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected FP logic op"); - case X86ISD::FOR: IntOpcode = ISD::OR; break; - case X86ISD::FXOR: IntOpcode = ISD::XOR; break; - case X86ISD::FAND: IntOpcode = ISD::AND; break; - case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; } - SDValue IntOp = DAG.getNode(IntOpcode, dl, MVT::v8i64, Op0, Op1); + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); return DAG.getBitcast(VT, IntOp); } return SDValue(); diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll index 25892de7ed0..d399e808f7f 100644 --- a/test/CodeGen/X86/avx512-arith.ll +++ b/test/CodeGen/X86/avx512-arith.ll @@ -1011,8 +1011,7 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; ; AVX512VL-LABEL: test_fxor_8f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; AVX512VL-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: test_fxor_8f32: diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll index a89a503fd49..1b1622513ea 100644 --- a/test/CodeGen/X86/pr13577.ll +++ b/test/CodeGen/X86/pr13577.ll @@ -30,10 +30,9 @@ declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone define float @pr26070() { ; CHECK-LABEL: pr26070: ; CHECK: ## BB#0: -; CHECK-NEXT: andps {{.*}}(%rip), %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; CHECK-NEXT: orps %xmm1, %xmm0 +; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; %c = call float @copysignf(float 1.0, float undef) readnone diff --git a/test/CodeGen/X86/vec-copysign-avx512.ll b/test/CodeGen/X86/vec-copysign-avx512.ll index 53c89a872c8..73349a3c117 100644 --- a/test/CodeGen/X86/vec-copysign-avx512.ll +++ b/test/CodeGen/X86/vec-copysign-avx512.ll @@ -5,11 +5,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { ; AVX512VL-LABEL: v4f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 -; AVX512VL-NEXT: vandps %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 -; AVX512VL-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX512VL-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f32: @@ -25,11 +23,9 @@ define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind { define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind { ; AVX512VL-LABEL: v8f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 +; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v8f32: @@ -61,12 +57,19 @@ define <16 x float> @v16f32(<16 x float> %a, <16 x float> %b) nounwind { } define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { -; CHECK-LABEL: v2f64: -; CHECK: ## BB#0: -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; AVX512VL-LABEL: v2f64: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vporq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512VLDQ-LABEL: v2f64: +; AVX512VLDQ: ## BB#0: +; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VLDQ-NEXT: vorps %xmm1, %xmm0, %xmm0 +; AVX512VLDQ-NEXT: retq %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b ) ret <2 x double> %tmp } @@ -74,11 +77,9 @@ define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind { define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind { ; AVX512VL-LABEL: v4f64: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 -; AVX512VL-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vporq %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512VLDQ-LABEL: v4f64: diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll index 15941f7a8bc..e61f01ec0c6 100644 --- a/test/CodeGen/X86/vec_fabs.ll +++ b/test/CodeGen/X86/vec_fabs.ll @@ -10,15 +10,35 @@ ; 2013. define <2 x double> @fabs_v2f64(<2 x double> %p) { -; X32-LABEL: fabs_v2f64: -; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: retl +; X32_AVX-LABEL: fabs_v2f64: +; X32_AVX: # BB#0: +; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX-NEXT: retl ; -; X64-LABEL: fabs_v2f64: -; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: retq +; X32_AVX512VL-LABEL: fabs_v2f64: +; X32_AVX512VL: # BB#0: +; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: retl +; +; X32_AVX512VLDQ-LABEL: fabs_v2f64: +; X32_AVX512VLDQ: # BB#0: +; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32_AVX512VLDQ-NEXT: retl +; +; X64_AVX-LABEL: fabs_v2f64: +; X64_AVX: # BB#0: +; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX-NEXT: retq +; +; X64_AVX512VL-LABEL: fabs_v2f64: +; X64_AVX512VL: # BB#0: +; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512VL-NEXT: retq +; +; X64_AVX512VLDQ-LABEL: fabs_v2f64: +; X64_AVX512VLDQ: # BB#0: +; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64_AVX512VLDQ-NEXT: retq %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) ret <2 x double> %t } @@ -32,8 +52,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) { ; ; X32_AVX512VL-LABEL: fabs_v4f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1 -; X32_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v4f32: @@ -48,8 +67,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) { ; ; X64_AVX512VL-LABEL: fabs_v4f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 -; X64_AVX512VL-NEXT: vandps %xmm1, %xmm0, %xmm0 +; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v4f32: @@ -69,8 +87,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; ; X32_AVX512VL-LABEL: fabs_v4f64: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %ymm1 -; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v4f64: @@ -85,8 +102,7 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) { ; ; X64_AVX512VL-LABEL: fabs_v4f64: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 -; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v4f64: @@ -106,8 +122,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) { ; ; X32_AVX512VL-LABEL: fabs_v8f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %ymm1 -; X32_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v8f32: @@ -122,8 +137,7 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) { ; ; X64_AVX512VL-LABEL: fabs_v8f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; X64_AVX512VL-NEXT: vandps %ymm1, %ymm0, %ymm0 +; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v8f32: