From 017d4a331bf53073ba48dd08ac2fb5dde1887f91 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 6 Jun 2018 10:52:10 +0000 Subject: [PATCH] [X86][BMI][TBM] Only demand bottom 16-bits of the BEXTR control op (PR34042) Only the bottom 16-bits of BEXTR's control op are required (0:8 INDEX, 15:8 LENGTH). Differential Revision: https://reviews.llvm.org/D47690 llvm-svn: 334083 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 8 +-- lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++ lib/Target/X86/X86InstrCompiler.td | 11 ---- lib/Target/X86/X86InstrInfo.td | 72 ++++++++++++++--------- lib/Target/X86/X86IntrinsicsInfo.h | 4 ++ test/CodeGen/X86/bmi-x86_64.ll | 4 +- test/CodeGen/X86/bmi.ll | 3 +- test/CodeGen/X86/tbm-intrinsics-x86_64.ll | 10 ++++ 8 files changed, 100 insertions(+), 46 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 392583dea8a..c7e91678374 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1780,10 +1780,10 @@ bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { } // In static codegen with small code model, we can get the address of a label - // into a register with 'movl'. TableGen has already made sure we're looking - // at a label of some kind. - assert(N->getOpcode() == X86ISD::Wrapper && - "Unexpected node type for MOV32ri64"); + // into a register with 'movl' + if (N->getOpcode() != X86ISD::Wrapper) + return false; + N = N.getOperand(0); // At least GNU as does not accept 'movl' for TPOFF relocations. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8d5a5e53dfd..2e454fafafd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -36834,6 +36834,39 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + unsigned NumBits = VT.getSizeInBits(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + // TODO - Constant Folding. + if (auto *Cst1 = dyn_cast(Op1)) { + // Reduce Cst1 to the bottom 16-bits. + // NOTE: SimplifyDemandedBits won't do this for constants. + const APInt &Val1 = Cst1->getAPIntValue(); + APInt MaskedVal1 = Val1 & 0xFFFF; + if (MaskedVal1 != Val1) + return DAG.getNode(X86ISD::BEXTR, SDLoc(N), VT, Op0, + DAG.getConstant(MaskedVal1, SDLoc(N), VT)); + } + + // Only bottom 16-bits of the control bits are required. + KnownBits Known; + APInt DemandedMask(APInt::getLowBitsSet(NumBits, 16)); + if (TLI.SimplifyDemandedBits(Op1, DemandedMask, Known, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + return SDValue(N, 0); + } + + return SDValue(); +} static bool isNullFPScalarOrVectorConst(SDValue V) { return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode()); @@ -39220,6 +39253,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); + case X86ISD::BEXTR: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, Subtarget); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index e455349e0d8..c863bac9722 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -2040,14 +2040,3 @@ let Predicates = [HasBMI, NoTBM] in { (MOV32ri64 mov64imm32:$src2), sub_32bit))>; } // HasBMI, NoTBM - -let Predicates = [HasTBM] in { - def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)), - (BEXTRI32ri GR32:$src1, imm:$src2)>; - def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)), - (BEXTRI32mi addr:$src1, imm:$src2)>; - def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2), - (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>; - def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2), - (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>; -} diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8a03b48e34b..4da5af69b77 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2357,9 +2357,35 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } -multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, - X86MemOperand x86memop, Intrinsic Int, - PatFrag ld_frag, X86FoldableSchedWrite Sched> { +multiclass bmi_bextr opc, string mnemonic, RegisterClass RC, + X86MemOperand x86memop, SDNode OpNode, + PatFrag ld_frag, X86FoldableSchedWrite Sched> { + def rr : I, + T8PS, VEX, Sched<[Sched]>; + def rm : I, T8PS, VEX, + Sched<[Sched.Folded, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src2 + ReadAfterLd]>; +} + +let Predicates = [HasBMI], Defs = [EFLAGS] in { + defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem, + X86bextr, loadi32, WriteBEXTR>; + defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem, + X86bextr, loadi64, WriteBEXTR>, VEX_W; +} + +multiclass bmi_bzhi opc, string mnemonic, RegisterClass RC, + X86MemOperand x86memop, Intrinsic Int, + PatFrag ld_frag, X86FoldableSchedWrite Sched> { def rr : I, @@ -2376,18 +2402,11 @@ multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, ReadAfterLd]>; } -let Predicates = [HasBMI], Defs = [EFLAGS] in { - defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem, - int_x86_bmi_bextr_32, loadi32, WriteBEXTR>; - defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem, - int_x86_bmi_bextr_64, loadi64, WriteBEXTR>, VEX_W; -} - let Predicates = [HasBMI2], Defs = [EFLAGS] in { - defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem, - int_x86_bmi_bzhi_32, loadi32, WriteBZHI>; - defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem, - int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W; + defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem, + int_x86_bmi_bzhi_32, loadi32, WriteBZHI>; + defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem, + int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W; } def CountTrailingOnes : SDNodeXForm opc, RegisterClass RC, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - Intrinsic Int, Operand immtype, - SDPatternOperator immoperator, - X86FoldableSchedWrite Sched> { +multiclass tbm_ternary_imm opc, RegisterClass RC, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + SDNode OpNode, Operand immtype, + SDPatternOperator immoperator, + X86FoldableSchedWrite Sched> { def ri : Ii32, + [(set RC:$dst, (OpNode RC:$src1, immoperator:$cntl))]>, XOP, XOPA, Sched<[Sched]>; def mi : Ii32, + [(set RC:$dst, (OpNode (ld_frag addr:$src1), immoperator:$cntl))]>, XOP, XOPA, Sched<[Sched.Folded]>; } -defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr{l}", i32mem, loadi32, - int_x86_tbm_bextri_u32, i32imm, imm, - WriteBEXTR>; +defm BEXTRI32 : tbm_ternary_imm<0x10, GR32, "bextr{l}", i32mem, loadi32, + X86bextr, i32imm, imm, WriteBEXTR>; let ImmT = Imm32S in -defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr{q}", i64mem, loadi64, - int_x86_tbm_bextri_u64, i64i32imm, - i64immSExt32, WriteBEXTR>, VEX_W; +defm BEXTRI64 : tbm_ternary_imm<0x10, GR64, "bextr{q}", i64mem, loadi64, + X86bextr, i64i32imm, + i64immSExt32, WriteBEXTR>, VEX_W; multiclass tbm_binary_rm opc, Format FormReg, Format FormMem, RegisterClass RC, string OpcodeStr, diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 219366da299..9e3810b10ca 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -1345,6 +1345,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_128 , IFMA_OP, X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_256 , IFMA_OP, X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_vpmadd52l_uq_512 , IFMA_OP, X86ISD::VPMADD52L, 0), + X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), @@ -1456,6 +1458,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), + X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0), X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), diff --git a/test/CodeGen/X86/bmi-x86_64.ll b/test/CodeGen/X86/bmi-x86_64.ll index 5a733ca3cf0..970dafdde4b 100644 --- a/test/CodeGen/X86/bmi-x86_64.ll +++ b/test/CodeGen/X86/bmi-x86_64.ll @@ -52,8 +52,8 @@ define i64 @bextr64b_load(i64* %x) { define i64 @bextr64c(i64 %x, i32 %y) { ; CHECK-LABEL: bextr64c: ; CHECK: # %bb.0: -; CHECK-NEXT: movslq %esi, %rax -; CHECK-NEXT: bextrq %rax, %rdi, %rax +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: bextrq %rsi, %rdi, %rax ; CHECK-NEXT: retq %tmp0 = sext i32 %y to i64 %tmp1 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %tmp0) diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll index 40e4c66959b..5ec2da420c4 100644 --- a/test/CodeGen/X86/bmi.ll +++ b/test/CodeGen/X86/bmi.ll @@ -404,8 +404,7 @@ define i32 @bextr32c(i32 %x, i16 zeroext %y) { ; ; X64-LABEL: bextr32c: ; X64: # %bb.0: -; X64-NEXT: movswl %si, %eax -; X64-NEXT: bextrl %eax, %edi, %eax +; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: retq %tmp0 = sext i16 %y to i32 %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0) diff --git a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll index 0f4b7ce6c93..98ee8f07c00 100644 --- a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll @@ -24,6 +24,16 @@ entry: ret i64 %0 } +define i64 @test_x86_tbm_bextri_u64_bigint(i64 %a) nounwind readnone { +; CHECK-LABEL: test_x86_tbm_bextri_u64_bigint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bextrq $65535, %rdi, %rax # imm = 0xFFFF +; CHECK-NEXT: retq +entry: + %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a, i64 549755813887) + ret i64 %0 +} + define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind readnone { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z: ; CHECK: # %bb.0: # %entry