From 6eb24fd744f4a1f2921f97b978b5bdfda216e287 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 10 Aug 2010 23:25:42 +0000 Subject: [PATCH] Add AVX matching patterns to Packed Bit Test intrinsics. Apply the same approach of SSE4.1 ptest intrinsics but create a new x86 node "testp" since AVX introduces vtest{ps}{pd} instructions which set ZF and CF depending on sign bit AND and ANDN of packed floating-point sources. This is slightly different from what the "ptest" does. Tests comming with the other 256 intrinsics tests. llvm-svn: 110744 --- lib/Target/X86/X86ISelLowering.cpp | 46 +++++++++++++++++--- lib/Target/X86/X86ISelLowering.h | 3 ++ lib/Target/X86/X86InstrFragmentsSIMD.td | 5 ++- lib/Target/X86/X86InstrSSE.td | 57 ++++++++++++++----------- 4 files changed, 78 insertions(+), 33 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 12453e3bc04..d8e25533a59 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6987,24 +6987,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // ptest intrinsics. The intrinsic these come from are designed to return - // an integer value, not just an instruction so lower it to the ptest - // pattern and a setcc for the result. + // ptest and testp intrinsics. The intrinsic these come from are designed to + // return an integer value, not just an instruction so lower it to the ptest + // or testp pattern and a setcc for the result. case Intrinsic::x86_sse41_ptestz: case Intrinsic::x86_sse41_ptestc: - case Intrinsic::x86_sse41_ptestnzc:{ + case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestz_256: + case Intrinsic::x86_avx_ptestc_256: + case Intrinsic::x86_avx_ptestnzc_256: + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + case Intrinsic::x86_avx_vtestc_pd_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: { + bool IsTestPacked = false; unsigned X86CC = 0; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + case Intrinsic::x86_avx_vtestz_ps: + case Intrinsic::x86_avx_vtestz_pd: + case Intrinsic::x86_avx_vtestz_ps_256: + case Intrinsic::x86_avx_vtestz_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_avx_ptestz_256: // ZF = 1 X86CC = X86::COND_E; break; + case Intrinsic::x86_avx_vtestc_ps: + case Intrinsic::x86_avx_vtestc_pd: + case Intrinsic::x86_avx_vtestc_ps_256: + case Intrinsic::x86_avx_vtestc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_avx_ptestc_256: // CF = 1 X86CC = X86::COND_B; break; + case Intrinsic::x86_avx_vtestnzc_ps: + case Intrinsic::x86_avx_vtestnzc_pd: + case Intrinsic::x86_avx_vtestnzc_ps_256: + case Intrinsic::x86_avx_vtestnzc_pd_256: + IsTestPacked = true; // Fallthrough case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_avx_ptestnzc_256: // ZF and CF = 0 X86CC = X86::COND_A; break; @@ -7012,7 +7046,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST; + SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); SDValue CC = DAG.getConstant(X86CC, MVT::i8); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); @@ -8033,6 +8068,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::AND: return "X86ISD::AND"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; + case X86ISD::TESTP: return "X86ISD::TESTP"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3556579ae65..61e304dad19 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -248,6 +248,9 @@ namespace llvm { // PTEST - Vector bitwise comparisons PTEST, + // TESTP - Vector packed fp sign bitwise comparisons + TESTP, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index a00b0803a3e..f5d28eda6d7 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -117,9 +117,10 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; + SDTCisVec<1>, + SDTCisSameAs<2, 1>]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; +def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; //===----------------------------------------------------------------------===// // SSE Complex Patterns diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 2768133fd5f..e24e5530c5f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4646,47 +4646,52 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", // the intel intrinsic that corresponds to this. let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, - OpSize, VEX; -def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX; + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + OpSize, VEX; -def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, - OpSize, VEX; +def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, + OpSize, VEX; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), - "vptest\t{$src2, $src1|$src1, $src2}", []>, OpSize, VEX; + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, + OpSize, VEX; } let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, OpSize; -def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, +def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "ptest \t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, OpSize; } // The bit test instructions below are AVX only multiclass avx_bittest opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop> { - def rr : SS48I, OpSize, VEX; - def rm : SS48I, OpSize, VEX; + X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { + def rr : SS48I, OpSize, VEX; + def rm : SS48I, + OpSize, VEX; } let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { - defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem>; - defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem>; - defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem>; - defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem>; +defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; +defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; +defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; } //===----------------------------------------------------------------------===//