diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 54bc9b81946..0acbe9da46f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1099,7 +1099,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PINSRDrr, X86::PINSRDrm, 0 }, { X86::PINSRQrr, X86::PINSRQrm, 0 }, { X86::PINSRWrri, X86::PINSRWrmi, 0 }, - { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, + { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, @@ -1397,7 +1397,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPINSRDrr, X86::VPINSRDrm, 0 }, { X86::VPINSRQrr, X86::VPINSRQrm, 0 }, { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, - { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, 0 }, + { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 }, { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, @@ -1557,7 +1557,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, - { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, 0 }, + { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 }, { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7ced35a56a8..2a2539e473c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3970,47 +3970,6 @@ def SSE_PMADD : OpndItins< let ExeDomain = SSEPackedInt in { // SSE integer instructions -multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, - OpndItins itins, - bit IsCommutable = 0, - bit Is2Addr = 1> { - let isCommutable = IsCommutable in - def rr : PDI, - Sched<[itins.Sched]>; - def rm : PDI, Sched<[itins.Sched.Folded, ReadAfterLd]>; -} - -multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, - Intrinsic IntId256, OpndItins itins, - bit IsCommutable = 0> { -let Predicates = [HasAVX] in - defm V#NAME : PDI_binop_rm_int, VEX_4V; - -let Constraints = "$src1 = $dst" in - defm NAME : PDI_binop_rm_int; - -let Predicates = [HasAVX2] in - defm V#NAME#Y : PDI_binop_rm_int, VEX_4V, VEX_L; -} - /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, ValueType DstVT, ValueType SrcVT, RegisterClass RC, @@ -4086,9 +4045,17 @@ defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>; -// Intrinsic forms -defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, - int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in +defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, + loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V; + +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in +defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, + VR256, loadv4i64, i128mem, SSE_PMADD, + 0>, VEX_4V, VEX_L; +let Constraints = "$src1 = $dst" in +defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, + memopv2i64, i128mem, SSE_PMADD>; let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, @@ -5529,16 +5496,16 @@ def SSE_PMULHRSW : OpndItins< /// SS3I_binop_rm - Simple SSSE3 bin op multiclass SS3I_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop, OpndItins itins, - bit Is2Addr = 1> { + ValueType DstVT, ValueType OpVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, bit Is2Addr = 1> { let isCommutable = 1 in def rr : SS38I, + [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))], itins.rr>, Sched<[itins.Sched]>; def rm : SS38I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, - (OpVT (OpNode RC:$src1, + (DstVT (OpNode (OpVT RC:$src1), (bitconvert (memop_frag addr:$src2)))))], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } @@ -5593,27 +5560,30 @@ multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { - defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, - loadv2i64, i128mem, + defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, + VR128, loadv2i64, i128mem, SSE_PSHUFB, 0>, VEX_4V; } -defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, VR128, - loadv2i64, i128mem, +defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, + VR128, loadv2i64, i128mem, SSE_PMULHRSW, 0>, VEX_4V; +defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, + v16i8, VR128, loadv2i64, i128mem, + SSE_PMADD, 0>, VEX_4V; } let ImmT = NoImm, Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, + defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128, loadv2i64, i128mem, SSE_PHADDSUBW, 0>, VEX_4V; - defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, + defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128, loadv2i64, i128mem, SSE_PHADDSUBD, 0>, VEX_4V; - defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, + defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128, loadv2i64, i128mem, SSE_PHADDSUBW, 0>, VEX_4V; - defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, + defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128, loadv2i64, i128mem, SSE_PHADDSUBD, 0>, VEX_4V; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", @@ -5631,35 +5601,35 @@ let isCommutable = 0 in { defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", int_x86_ssse3_phsub_sw_128, SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V; - defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", - int_x86_ssse3_pmadd_ub_sw_128, - SSE_PMADD, loadv2i64, 0>, VEX_4V; } } let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { - defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, - loadv4i64, i256mem, + defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, + VR256, loadv4i64, i256mem, SSE_PSHUFB, 0>, VEX_4V, VEX_L; } -defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, VR256, - loadv4i64, i256mem, +defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, + VR256, loadv4i64, i256mem, SSE_PMULHRSW, 0>, VEX_4V, VEX_L; +defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, + v32i8, VR256, loadv4i64, i256mem, + SSE_PMADD, 0>, VEX_4V, VEX_L; } let ImmT = NoImm, Predicates = [HasAVX2] in { let isCommutable = 0 in { - defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, + defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16, + VR256, loadv4i64, i256mem, + SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; + defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256, loadv4i64, i256mem, SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; - defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, - loadv4i64, i256mem, + defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16, + VR256, loadv4i64, i256mem, SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; - defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, - loadv4i64, i256mem, - SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; - defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, + defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256, loadv4i64, i256mem, SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; defm VPSIGNBY : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, @@ -5674,22 +5644,19 @@ let isCommutable = 0 in { defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", int_x86_avx2_phsub_sw, WriteVecALU>, VEX_4V, VEX_L; - defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", - int_x86_avx2_pmadd_ub_sw, - WriteVecIMul>, VEX_4V, VEX_L; } } // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { - defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, + defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128, memopv2i64, i128mem, SSE_PHADDSUBW>; - defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, + defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128, memopv2i64, i128mem, SSE_PHADDSUBD>; - defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, + defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128, memopv2i64, i128mem, SSE_PHADDSUBW>; - defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, + defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, memopv2i64, i128mem, SSE_PHADDSUBD>; defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, SSE_PSIGN, memopv2i64>; @@ -5697,7 +5664,7 @@ let isCommutable = 0 in { SSE_PSIGN, memopv2i64>; defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, SSE_PSIGN, memopv2i64>; - defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, + defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, memopv2i64, i128mem, SSE_PSHUFB>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128, @@ -5705,12 +5672,12 @@ let isCommutable = 0 in { defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", int_x86_ssse3_phsub_sw_128, SSE_PHADDSUBSW, memopv2i64>; - defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw_128, - SSE_PMADD, memopv2i64>; + defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, + v16i8, VR128, memopv2i64, i128mem, + SSE_PMADD>; } -defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, VR128, - memopv2i64, i128mem, SSE_PMULHRSW>; +defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, + VR128, memopv2i64, i128mem, SSE_PMULHRSW>; } //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6eda95e1a4f..a2fffe89df6 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -289,6 +289,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0), X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(avx2_pmadd_ub_sw, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), + X86_INTRINSIC_DATA(avx2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(avx2_pmul_hr_sw, INTR_TYPE_2OP, X86ISD::MULHRS, 0), @@ -1760,6 +1762,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_paddus_w, INTR_TYPE_2OP, X86ISD::ADDUS, 0), X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0), X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0), + X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), @@ -1808,6 +1811,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), + X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0), X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0), X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),