diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 702ce9ee8d6..5e8ab9e02c5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -481,11 +481,10 @@ class N3VD op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } class N3VDSL op21_20, bits<4> op11_8, - string OpcodeStr, ValueType Ty, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -496,7 +495,7 @@ class N3VDSL16 op21_20, bits<4> op11_8, string OpcodeStr, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, + IIC_VMULi16D, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -515,11 +514,11 @@ class N3VQ op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -530,7 +529,7 @@ class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, + IIC_VMULi16Q, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -544,7 +543,7 @@ class N3VDs op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; } @@ -557,32 +556,30 @@ class N3VDsPat // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } -class N3VDIntSL op21_20, bits<4> op11_8, +class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]> { let isCommutable = 0; } -class N3VDIntSL16 op21_20, bits<4> op11_8, +class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -591,32 +588,30 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, } class N3VQInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } -class N3VQIntSL op21_20, bits<4> op11_8, +class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]> { let isCommutable = 0; } -class N3VQIntSL16 op21_20, bits<4> op11_8, +class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -626,30 +621,29 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; -class N3VDMulOpSL op21_20, bits<4> op11_8, +class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), - NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, (Ty (NEONvduplane (Ty DPR_VFP2:$src3), imm:$lane)))))))]>; -class N3VDMulOpSL16 op21_20, bits<4> op11_8, +class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), - NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -658,32 +652,31 @@ class N3VDMulOpSL16 op21_20, bits<4> op11_8, imm:$lane)))))))]>; class N3VQMulOp op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, ValueType Ty, + SDNode MulOp, SDNode OpNode> : N3V; -class N3VQMulOpSL op21_20, bits<4> op11_8, +class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))))]>; -class N3VQMulOpSL16 op21_20, bits<4> op11_8, +class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -693,10 +686,11 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, // Multiply-Add/Sub operations, scalar single-precision class N3VDMulOps op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VDMulOpsPat @@ -710,18 +704,18 @@ class N3VDMulOpsPat // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; @@ -729,31 +723,30 @@ class N3VQInt3 op21_20, bits<4> op11_8, bit op4, // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; -class N3VLInt3SL op21_20, bits<4> op11_8, +class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; -class N3VLInt3SL16 op21_20, bits<4> op11_8, +class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -775,31 +768,29 @@ class N3VNInt op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } -class N3VLIntSL op21_20, bits<4> op11_8, +class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; -class N3VLIntSL16 op21_20, bits<4> op11_8, +class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -821,13 +812,13 @@ class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Pairwise long 2-register accumulate intrinsics, @@ -837,29 +828,31 @@ class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Shift by immediate, // both double- and quad-register. class N2VDSh op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm; @@ -868,17 +861,17 @@ class N2VLSh op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Narrow shift by immediate. class N2VNSh op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + bit op6, bit op4, InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; @@ -889,7 +882,7 @@ class N2VDShAdd op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -897,7 +890,7 @@ class N2VQShAdd op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -908,14 +901,14 @@ class N2VDShIns op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShIns op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -925,14 +918,14 @@ class N2VCvtD op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; class N2VCvtQ op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; @@ -966,9 +959,9 @@ multiclass N3V_QHS op11_8, bit op4, multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: @@ -1016,45 +1009,56 @@ multiclass N2VLInt_QHS op11_8, bit op7, bit op6, // First with only element sizes of 16 and 32 bits: multiclass N3VInt_HS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt; - def v2i32 : N3VDInt; // 128-bit vector types. - def v8i16 : N3VQInt; - def v4i32 : N3VQInt; } -multiclass N3VIntSL_HS op11_8, string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VDIntSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; - def v2i32 : N3VDIntSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; - def v8i16 : N3VQIntSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; - def v4i32 : N3VQIntSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; +multiclass N3VIntSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VInt_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_HS { - def v8i8 : N3VDInt; - def v16i8 : N3VQInt; + : N3VInt_HS { + def v8i8 : N3VDInt; + def v16i8 : N3VQInt; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_QHS { - def v1i64 : N3VDInt; - def v2i64 : N3VQInt; + : N3VInt_QHS { + def v1i64 : N3VDInt; + def v2i64 : N3VQInt; } @@ -1075,27 +1079,29 @@ multiclass N3VNInt_HSD op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt; - def v2i64 : N3VLInt; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> { + def v4i32 : N3VLInt; + def v2i64 : N3VLInt; } multiclass N3VLIntSL_HS op11_8, - string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VLIntSL16 { + def v4i16 : N3VLIntSL16; - def v2i32 : N3VLIntSL; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS { - def v8i16 : N3VLInt; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> + : N3VLInt_HS { + def v8i16 : N3VLInt; } @@ -1115,32 +1121,37 @@ multiclass N3VWInt_QHS op11_8, bit op4, // Neon Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: multiclass N3VMulOp_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N3VDMulOp; - def v4i16 : N3VDMulOp; - def v2i32 : N3VDMulOp; // 128-bit vector types. - def v16i8 : N3VQMulOp; - def v8i16 : N3VQMulOp; - def v4i32 : N3VQMulOp; } -multiclass N3VMulOpSL_HS op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDMulOpSL16<0b01, op11_8, +multiclass N3VMulOpSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; - def v2i32 : N3VDMulOpSL<0b10, op11_8, + def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; - def v8i16 : N3VQMulOpSL16<0b01, op11_8, + def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; - def v4i32 : N3VQMulOpSL<0b10, op11_8, + def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; } @@ -1149,19 +1160,19 @@ multiclass N3VMulOpSL_HS op11_8, string OpcodeStr, SDNode ShOp> { multiclass N3VInt3_QHS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3; - def v4i16 : N3VDInt3; - def v2i32 : N3VDInt3; // 128-bit vector types. - def v16i8 : N3VQInt3; - def v8i16 : N3VQInt3; - def v4i32 : N3VQInt3; } @@ -1171,17 +1182,17 @@ multiclass N3VInt3_QHS op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { - def v4i32 : N3VLInt3; - def v2i64 : N3VLInt3; } multiclass N3VLInt3SL_HS op11_8, string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VLInt3SL16; - def v2i32 : N3VLInt3SL; } @@ -1189,7 +1200,7 @@ multiclass N3VLInt3SL_HS op11_8, multiclass N3VLInt3_QHS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> : N3VLInt3_HS { - def v8i16 : N3VLInt3; } @@ -1267,25 +1278,25 @@ multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD op11_8, bit op4, - string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh; - def v4i16 : N2VDSh; - def v2i32 : N2VDSh; - def v1i64 : N2VDSh; // 128-bit vector types. - def v16i8 : N2VQSh; - def v8i16 : N2VQSh; - def v4i32 : N2VQSh; - def v2i64 : N2VQSh; } @@ -1352,20 +1363,26 @@ defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; +defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; +defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; +defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) @@ -1376,15 +1393,15 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; // VMUL : Vector Multiply (integer, polynomial and floating-point) defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, "vmul.f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, "vmul.f32", v4f32, v2f32, fmul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -1405,8 +1422,12 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; -defm VQDMULHsl: N3VIntSL_HS<0b1100, "vqdmulh.s", int_arm_neon_vqdmulh>; +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh, 1>; +defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh>; def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), @@ -1421,8 +1442,12 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; -defm VQRDMULHsl : N3VIntSL_HS<0b1101, "vqrdmulh.s", int_arm_neon_vqrdmulh>; +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; +defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh>; def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), @@ -1437,26 +1462,28 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, "vmull.s", int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, "vmull.u", int_arm_neon_vmullu>; +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, "vqdmull.s", int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) -defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; -defm VMLAsl : N3VMulOpSL_HS<0b0000, "vmla.i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, "vmla.f32", v2f32, fmul, fadd>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, "vmla.f32", v4f32, v2f32, fmul, fadd>; +defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; +defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1497,12 +1524,14 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) -defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; -defm VMLSsl : N3VMulOpSL_HS<0b0100, "vmls.i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, "vmls.f32", v2f32, fmul, fsub>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, "vmls.f32", v4f32, v2f32, fmul, fsub>; +defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; +defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1549,17 +1578,21 @@ defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) @@ -1587,14 +1620,14 @@ defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, @@ -1621,7 +1654,7 @@ def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), [(set DPR:$dst, (v2i32 (and DPR:$src1, (vnot_conv DPR:$src2))))]>; def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), NoItinerary, + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, "vbic\t$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnot_conv QPR:$src2))))]>; @@ -1633,18 +1666,18 @@ def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot_conv DPR:$src2))))]>; def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), NoItinerary, + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, "vorn\t$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, + (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, "vmvn\t$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, + (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, "vmvn\t$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; @@ -1652,13 +1685,13 @@ def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), NoItinerary, + (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), @@ -1675,16 +1708,18 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; @@ -1697,31 +1732,35 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long @@ -1737,35 +1776,35 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. @@ -1785,9 +1824,9 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate @@ -1805,21 +1844,23 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", @@ -1844,86 +1885,90 @@ def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", - v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", - v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", - v2i32, v2i64, NEONvshrn>; +def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; +def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; +def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; -defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; +defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", - v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", - v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", - v2i32, v2i64, NEONvrshrn>; +def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; +def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; +def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; -defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; +defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", - v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", - v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", - v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", - v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", - v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", - v2i32, v2i64, NEONvqshrnu>; +def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; +def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; +def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; +def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; +def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; +def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", - v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", - v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", - v2i32, v2i64, NEONvqshrnsu>; +def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; +def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; +def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", - int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", - int_arm_neon_vqrshiftu, 0>; +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", - v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", - v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", - v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", - v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", - v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", - v2i32, v2i64, NEONvqrshrnu>; +def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; +def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; +def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; +def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; +def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; +def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", - v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", - v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", - v2i32, v2i64, NEONvqrshrnsu>; +def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; +def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; +def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -1962,13 +2007,11 @@ def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; class VNEGD size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; class VNEGQ size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate @@ -1981,11 +2024,11 @@ def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, + (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg.f32\t$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, + (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, "vneg.f32\t$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; @@ -2024,9 +2067,9 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // VMOV : Vector Move (Register) def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - NoItinerary, "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - NoItinerary, "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -2066,38 +2109,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // be encoded based on the immed values. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins i8imm:$SIMM), NoItinerary, + (ins i8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins i8imm:$SIMM), NoItinerary, + (ins i8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i16imm:$SIMM), NoItinerary, + (ins i16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i16imm:$SIMM), NoItinerary, + (ins i16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i32imm:$SIMM), NoItinerary, + (ins i32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i32imm:$SIMM), NoItinerary, + (ins i32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins i64imm:$SIMM), NoItinerary, + (ins i64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins i64imm:$SIMM), NoItinerary, + (ins i64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; @@ -2105,27 +2148,27 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".s8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".s16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".u8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".u16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".32\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -2166,17 +2209,17 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), let Constraints = "$src1 = $dst" in { def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".8\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".16\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".32\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } @@ -2242,11 +2285,11 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), class VDUPD opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; class VDUPQ opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; @@ -2257,11 +2300,11 @@ def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", ".32\t$dst, $src", [(set DPR:$dst, (v2f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", ".32\t$dst, $src", [(set QPR:$dst, (v4f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; @@ -2269,14 +2312,14 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), class VDUPLND op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, - (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), NoItinerary, + (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, ValueType ResTy, ValueType OpTy> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, - (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), NoItinerary, + (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; @@ -2308,12 +2351,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, (outs DPR:$dst), (ins SPR:$src), - NoItinerary, "vdup.32\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, (outs QPR:$dst), (ins SPR:$src), - NoItinerary, "vdup.32\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), @@ -2387,12 +2430,12 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", class VREV64D op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; class VREV64Q op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; @@ -2410,12 +2453,12 @@ def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; class VREV32D op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; class VREV32Q op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; @@ -2429,12 +2472,12 @@ def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; class VREV16D op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; class VREV16Q op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; @@ -2447,14 +2490,14 @@ def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; class VEXTd : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NoItinerary, + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), (Ty DPR:$rhs), imm:$index)))]>; class VEXTq : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NoItinerary, + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), (Ty QPR:$rhs), imm:$index)))]>; @@ -2504,24 +2547,24 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; // VTBL : Vector Table Lookup def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$src), IIC_VTB1, "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -2529,25 +2572,25 @@ def VTBL4 // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -2576,11 +2619,11 @@ def : N3VDsPat; // Vector Multiply-Accumulate/Subtract used for single-precision FP let neverHasSideEffects = 1 in -def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32,fmul,fadd>; +def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; def : N3VDMulOpsPat; let neverHasSideEffects = 1 in -def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32,fmul,fsub>; +def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; def : N3VDMulOpsPat; // Vector Absolute used for single-precision FP diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index a476df0b2b4..4ecc79d7a11 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -225,16 +225,16 @@ def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), // def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_fpMOVSI, "fmrs", " $dst, $src", + IIC_VMOVSI, "fmrs", " $dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_fpMOVIS, "fmsr", " $dst, $src", + IIC_VMOVIS, "fmsr", " $dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def FMRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), - IIC_fpMOVDI, "fmrrd", " $dst1, $dst2, $src", + IIC_VMOVDI, "fmrrd", " $dst1, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR @@ -242,7 +242,7 @@ def FMRRD : AVConv3I<0b11000101, 0b1011, def FMDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_fpMOVID, "fmdrr", " $dst, $src1, $src2", + IIC_VMOVID, "fmdrr", " $dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 4dc369ad5f0..fc4c5f5830b 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -63,10 +63,6 @@ def IIC_iStoresiu : InstrItinClass; def IIC_iStorem : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; -def IIC_fpMOVIS : InstrItinClass; -def IIC_fpMOVID : InstrItinClass; -def IIC_fpMOVSI : InstrItinClass; -def IIC_fpMOVDI : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; def IIC_fpUNA64 : InstrItinClass; def IIC_fpCMP32 : InstrItinClass; @@ -102,11 +98,21 @@ def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; def IIC_VBIND : InstrItinClass; def IIC_VBINQ : InstrItinClass; +def IIC_VMOVImm : InstrItinClass; def IIC_VMOVD : InstrItinClass; def IIC_VMOVQ : InstrItinClass; +def IIC_VMOVIS : InstrItinClass; +def IIC_VMOVID : InstrItinClass; +def IIC_VMOVISL : InstrItinClass; +def IIC_VMOVSI : InstrItinClass; +def IIC_VMOVDI : InstrItinClass; def IIC_VPERMD : InstrItinClass; def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; +def IIC_VMACD : InstrItinClass; +def IIC_VMACQ : InstrItinClass; +def IIC_VRECSD : InstrItinClass; +def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; def IIC_VCNTiQ : InstrItinClass; def IIC_VUNAiD : InstrItinClass; @@ -119,10 +125,30 @@ def IIC_VSUBiD : InstrItinClass; def IIC_VSUBiQ : InstrItinClass; def IIC_VBINi4D : InstrItinClass; def IIC_VBINi4Q : InstrItinClass; +def IIC_VSHLiD : InstrItinClass; +def IIC_VSHLiQ : InstrItinClass; +def IIC_VSHLi4D : InstrItinClass; +def IIC_VSHLi4Q : InstrItinClass; +def IIC_VPALiD : InstrItinClass; +def IIC_VPALiQ : InstrItinClass; def IIC_VMULi16D : InstrItinClass; def IIC_VMULi32D : InstrItinClass; def IIC_VMULi16Q : InstrItinClass; def IIC_VMULi32Q : InstrItinClass; +def IIC_VMACi16D : InstrItinClass; +def IIC_VMACi32D : InstrItinClass; +def IIC_VMACi16Q : InstrItinClass; +def IIC_VMACi32Q : InstrItinClass; +def IIC_VEXTD : InstrItinClass; +def IIC_VEXTQ : InstrItinClass; +def IIC_VTB1 : InstrItinClass; +def IIC_VTB2 : InstrItinClass; +def IIC_VTB3 : InstrItinClass; +def IIC_VTB4 : InstrItinClass; +def IIC_VTBX1 : InstrItinClass; +def IIC_VTBX2 : InstrItinClass; +def IIC_VTBX3 : InstrItinClass; +def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 33af58a1e21..e5658139523 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -171,23 +171,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // FP Special Register to Integer Register File Move InstrItinData, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Integer to Single-Precision FP Register File Move - InstrItinData, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Integer to Double-Precision FP Register File Move - InstrItinData, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Single-Precision FP to Integer Register File Move - InstrItinData, - InstrStage<1, [FU_NLSPipe], 1>], [20, 1]>, - // - // Double-Precision FP to Integer Register File Move - InstrItinData, - InstrStage<1, [FU_NLSPipe], 1>], [20, 20, 1]>, + InstrStage<1, [FU_NLSPipe]>]>, // // Single-precision FP Unary InstrItinData, @@ -385,6 +369,10 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData, InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, // + // Move Immediate + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3]>, + // // Double-register Permute Move InstrItinData, InstrStage<1, [FU_NLSPipe]>], [2, 1]>, @@ -395,6 +383,26 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData, InstrStage<2, [FU_NLSPipe]>], [3, 1]>, // + // Integer to Single-precision Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // // Double-register Permute InstrItinData, InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, @@ -413,15 +421,33 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrStage<1, [FU_NPipe], 0>, InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, // + // Double-register FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData, + InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, + // + // Double-register Reciprical Step + InstrItinData, + InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData, + InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, + // // Double-register Integer Count InstrItinData, - InstrStage<1, [FU_NPipe]>], [3, 2]>, + InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases InstrItinData, - InstrStage<2, [FU_NPipe]>], [4, 2]>, + InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, // // Double-register Integer Unary InstrItinData, @@ -463,6 +489,30 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData, InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, // + // Double-register Integer Shift + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData, + InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData, + InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + // // Double-register Integer Multiply (.8, .16) InstrItinData, InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, @@ -479,7 +529,59 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData, InstrStage<1, [FU_NPipe]>, InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]> - - + InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<1, [FU_NPipe]>, + InstrStage<2, [FU_NLSPipe], 0>, + InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, + // + // Double-register VEXT + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>;