diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index e610ee28abe..8a971f6fc57 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -269,6 +269,14 @@ public: return true; } + /// Return true if it is profitable to convert a select of FP constants into + /// a constant pool load whose address depends on the select condition. The + /// parameter may be used to differentiate a select with FP compare from + /// integer compare. + virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { + return true; + } + /// Return true if multiple condition registers are available. bool hasMultipleConditionRegisters() const { return HasMultipleConditionRegisters; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bce188e399a..db9a1048876 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18129,6 +18129,9 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + return SDValue(); + // If we are before legalize types, we want the other legalization to happen // first (for example, to avoid messing with soft float). auto *TV = dyn_cast(N2); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2e13e95ad5..c11598f2eb4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4767,6 +4767,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } +bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { + // If we are using XMM registers in the ABI and the condition of the select is + // a floating-point compare and we have blendv or conditional move, then it is + // cheaper to select instead of doing a cross-register move and creating a + // load that depends on the compare result. + return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); +} + bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { // TODO: It might be a win to ease or lift this restriction, but the generic // folds in DAGCombiner conflict with vector folds for an AVX512 target. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index d95241237ab..fe151d89dd2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1041,6 +1041,8 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override; + bool convertSelectOfConstantsToMath(EVT VT) const override; bool decomposeMulByConstant(EVT VT, SDValue C) const override; diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index 89d811f8681..ecdca99bea3 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -69,10 +69,10 @@ define float @test5(float %p) #0 { ; ALL-NEXT: ## %bb.2: ## %return ; ALL-NEXT: retq ; ALL-NEXT: LBB3_1: ## %if.end -; ALL-NEXT: seta %al -; ALL-NEXT: movzbl %al, %eax -; ALL-NEXT: leaq {{.*}}(%rip), %rcx +; ALL-NEXT: vcmpltss %xmm0, %xmm1, %k1 +; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ALL-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; ALL-NEXT: retq entry: %cmp = fcmp oeq float %p, 0.000000e+00 diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 35a50acd3c8..3bfe088a426 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -1148,9 +1148,10 @@ define float @test5(float %p) #0 { ; GENERIC-NEXT: # %bb.2: # %return ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB67_1: # %if.end -; GENERIC-NEXT: seta %al # sched: [2:1.00] -; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33] +; GENERIC-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test5: @@ -1162,9 +1163,10 @@ define float @test5(float %p) #0 { ; SKX-NEXT: # %bb.2: # %return ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB67_1: # %if.end -; SKX-NEXT: seta %al # sched: [2:1.00] -; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25] +; SKX-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] entry: %cmp = fcmp oeq float %p, 0.000000e+00 diff --git a/test/CodeGen/X86/select-of-fp-constants.ll b/test/CodeGen/X86/select-of-fp-constants.ll index 84b0bc13441..9ab12bc89b1 100644 --- a/test/CodeGen/X86/select-of-fp-constants.ll +++ b/test/CodeGen/X86/select-of-fp-constants.ll @@ -76,18 +76,18 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone { ; ; X64_AVX2-LABEL: fcmp_select_fp_constants: ; X64_AVX2: # %bb.0: +; X64_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64_AVX2-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %xmm0 -; X64_AVX2-NEXT: vmovd %xmm0, %eax -; X64_AVX2-NEXT: andl $1, %eax -; X64_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 ; X64_AVX2-NEXT: retq ; ; X64_AVX512F-LABEL: fcmp_select_fp_constants: ; X64_AVX512F: # %bb.0: -; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k0 -; X64_AVX512F-NEXT: kmovw %k0, %eax -; X64_AVX512F-NEXT: movzwl %ax, %eax +; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64_AVX512F-NEXT: vcmpneqss {{.*}}(%rip), %xmm0, %k1 ; X64_AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64_AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; X64_AVX512F-NEXT: retq %c = fcmp une float %x, -4.0 %r = select i1 %c, float 42.0, float 23.0 diff --git a/test/CodeGen/X86/vselect-zero.ll b/test/CodeGen/X86/vselect-zero.ll index 722136d663c..70998b92bbb 100644 --- a/test/CodeGen/X86/vselect-zero.ll +++ b/test/CodeGen/X86/vselect-zero.ll @@ -129,9 +129,9 @@ define double @fsel_nonzero_constants(double %x, double %y) { ; AVX-LABEL: fsel_nonzero_constants: ; AVX: # %bb.0: ; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq %cond = fcmp oeq double %x, %y %r = select i1 %cond, double 12.0, double 42.0