mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AArch64][SVE] Lower MULHU/MULHS nodes to umulh/smulh instructions
Mark MULHS/MULHU nodes as legal for both scalable and fixed SVE types, and lower them to the appropriate SVE instructions. Additionally now that the MULH nodes are legal, integer divides can be expanded into a more performant code sequence. Differential Revision: https://reviews.llvm.org/D100487
This commit is contained in:
parent
7d693d1c6d
commit
935e65cc31
@ -1126,6 +1126,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::MGATHER, VT, Custom);
|
||||
setOperationAction(ISD::MSCATTER, VT, Custom);
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::MULHS, VT, Custom);
|
||||
setOperationAction(ISD::MULHU, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
@ -1149,8 +1151,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
|
||||
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::MULHU, VT, Expand);
|
||||
setOperationAction(ISD::MULHS, VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
}
|
||||
@ -1259,6 +1259,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
|
||||
@ -1453,6 +1457,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
setOperationAction(ISD::FTRUNC, VT, Custom);
|
||||
setOperationAction(ISD::LOAD, VT, Custom);
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::MULHS, VT, Custom);
|
||||
setOperationAction(ISD::MULHU, VT, Custom);
|
||||
setOperationAction(ISD::OR, VT, Custom);
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
@ -1799,6 +1805,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
|
||||
MAKE_CASE(AArch64ISD::ADD_PRED)
|
||||
MAKE_CASE(AArch64ISD::MUL_PRED)
|
||||
MAKE_CASE(AArch64ISD::MULHS_PRED)
|
||||
MAKE_CASE(AArch64ISD::MULHU_PRED)
|
||||
MAKE_CASE(AArch64ISD::SDIV_PRED)
|
||||
MAKE_CASE(AArch64ISD::SHL_PRED)
|
||||
MAKE_CASE(AArch64ISD::SMAX_PRED)
|
||||
@ -4519,6 +4527,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
||||
return LowerSET_ROUNDING(Op, DAG);
|
||||
case ISD::MUL:
|
||||
return LowerMUL(Op, DAG);
|
||||
case ISD::MULHS:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::MULHU:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::STORE:
|
||||
|
@ -89,6 +89,8 @@ enum NodeType : unsigned {
|
||||
FMUL_PRED,
|
||||
FSUB_PRED,
|
||||
MUL_PRED,
|
||||
MULHS_PRED,
|
||||
MULHU_PRED,
|
||||
SDIV_PRED,
|
||||
SHL_PRED,
|
||||
SMAX_PRED,
|
||||
|
@ -192,10 +192,12 @@ def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
|
||||
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
|
||||
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
|
||||
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
|
||||
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
|
||||
def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
|
||||
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
|
||||
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
|
||||
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
|
||||
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
|
||||
|
||||
def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
|
||||
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
|
||||
@ -348,6 +350,8 @@ let Predicates = [HasSVE] in {
|
||||
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
|
||||
|
||||
defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
|
||||
defm SMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64smulh_p>;
|
||||
defm UMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64umulh_p>;
|
||||
|
||||
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
|
||||
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
|
||||
@ -2385,8 +2389,8 @@ let Predicates = [HasSVE2] in {
|
||||
|
||||
// SVE2 integer multiply vectors (unpredicated)
|
||||
defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
|
||||
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
|
||||
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
|
||||
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag, AArch64smulh_p>;
|
||||
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag, AArch64umulh_p>;
|
||||
defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
|
||||
|
||||
// Add patterns for unpredicated version of smulh and umulh.
|
||||
|
144
test/CodeGen/AArch64/sve-expand-div.ll
Normal file
144
test/CodeGen/AArch64/sve-expand-div.ll
Normal file
@ -0,0 +1,144 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
|
||||
|
||||
; Check that expensive divides are expanded into a more performant sequence
|
||||
|
||||
;
|
||||
; SDIV
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z1.b, #86 // =0x56
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
|
||||
; CHECK-NEXT: lsr z1.b, z0.b, #7
|
||||
; CHECK-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: and z1.d, z1.d, z2.d
|
||||
; CHECK-NEXT: add z0.b, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer)
|
||||
ret <vscale x 16 x i8> %div
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #21846
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z1.h, w8
|
||||
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: lsr z1.h, z0.h, #15
|
||||
; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: and z1.d, z1.d, z2.d
|
||||
; CHECK-NEXT: add z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
|
||||
ret <vscale x 8 x i16> %div
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #21846
|
||||
; CHECK-NEXT: movk w8, #21845, lsl #16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z1.s, w8
|
||||
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: lsr z1.s, z0.s, #31
|
||||
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: and z1.d, z1.d, z2.d
|
||||
; CHECK-NEXT: add z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
|
||||
ret <vscale x 4 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #6148914691236517205
|
||||
; CHECK-NEXT: movk x8, #21846
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z1.d, x8
|
||||
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: lsr z1.d, z0.d, #63
|
||||
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: and z1.d, z1.d, z2.d
|
||||
; CHECK-NEXT: add z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
|
||||
ret <vscale x 2 x i64> %div
|
||||
}
|
||||
|
||||
;
|
||||
; UDIV
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: udiv_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov z2.b, #1 // =0x1
|
||||
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z0.b
|
||||
; CHECK-NEXT: lsr z1.b, z1.b, #1
|
||||
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, #3
|
||||
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer)
|
||||
ret <vscale x 16 x i8> %div
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: udiv_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #-21845
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z2.h, w8
|
||||
; CHECK-NEXT: mov z1.h, #1 // =0x1
|
||||
; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z0.h
|
||||
; CHECK-NEXT: lsr z2.h, z2.h, #1
|
||||
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, #3
|
||||
; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
|
||||
ret <vscale x 8 x i16> %div
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: udiv_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov w8, #43691
|
||||
; CHECK-NEXT: movk w8, #43690, lsl #16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z2.s, w8
|
||||
; CHECK-NEXT: mov z1.s, #3 // =0x3
|
||||
; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z0.s
|
||||
; CHECK-NEXT: lsr z2.s, z2.s, #1
|
||||
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, #1
|
||||
; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
|
||||
ret <vscale x 4 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: udiv_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #-6148914691236517206
|
||||
; CHECK-NEXT: movk x8, #43691
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z2.d, x8
|
||||
; CHECK-NEXT: mov z1.d, #3 // =0x3
|
||||
; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z0.d
|
||||
; CHECK-NEXT: lsr z2.d, z2.d, #1
|
||||
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, #1
|
||||
; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%div = udiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
|
||||
ret <vscale x 2 x i64> %div
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
@ -970,7 +970,7 @@ define void @udiv_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
|
||||
|
||||
; This used to crash because isUnaryPredicate and BuildUDIV don't know how
|
||||
; a SPLAT_VECTOR of fixed vector type should be handled.
|
||||
define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 {
|
||||
define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #1 {
|
||||
; CHECK-LABEL: udiv_constantsplat_v8i32:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
|
||||
; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
|
||||
@ -985,3 +985,4 @@ define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 {
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
||||
attributes #1 = { "target-features"="+sve" minsize }
|
||||
|
1006
test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
Normal file
1006
test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
Normal file
File diff suppressed because it is too large
Load Diff
@ -776,7 +776,7 @@ define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a){
|
||||
ret <vscale x 2 x i64> %lshr
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sdiv_const(<vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x i32> @sdiv_const(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: sdiv_const:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z1.s, #3 // =0x3
|
||||
@ -788,7 +788,7 @@ entry:
|
||||
ret <vscale x 4 x i32> %div
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @udiv_const(<vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x i32> @udiv_const(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: udiv_const:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z1.s, #3 // =0x3
|
||||
@ -799,3 +799,5 @@ entry:
|
||||
%div = udiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
|
||||
ret <vscale x 4 x i32> %div
|
||||
}
|
||||
|
||||
attributes #0 = { minsize }
|
||||
|
140
test/CodeGen/AArch64/sve-int-mulh-pred.ll
Normal file
140
test/CodeGen/AArch64/sve-int-mulh-pred.ll
Normal file
@ -0,0 +1,140 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; SMULH
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
|
||||
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
|
||||
%2 = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
|
||||
%mul = mul <vscale x 16 x i16> %1, %2
|
||||
%shr = lshr <vscale x 16 x i16> %mul, %splat
|
||||
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
|
||||
ret <vscale x 16 x i8> %tr
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
|
||||
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
|
||||
%2 = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
|
||||
%mul = mul <vscale x 8 x i32> %1, %2
|
||||
%shr = lshr <vscale x 8 x i32> %mul, %splat
|
||||
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %tr
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
|
||||
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
|
||||
%2 = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
|
||||
%mul = mul <vscale x 4 x i64> %1, %2
|
||||
%shr = lshr <vscale x 4 x i64> %mul, %splat
|
||||
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %tr
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
|
||||
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
|
||||
%2 = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
|
||||
%mul = mul <vscale x 2 x i128> %1, %2
|
||||
%shr = lshr <vscale x 2 x i128> %mul, %splat
|
||||
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %tr
|
||||
}
|
||||
|
||||
;
|
||||
; UMULH
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
|
||||
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
|
||||
%2 = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
|
||||
%mul = mul <vscale x 16 x i16> %1, %2
|
||||
%shr = lshr <vscale x 16 x i16> %mul, %splat
|
||||
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
|
||||
ret <vscale x 16 x i8> %tr
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
|
||||
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
|
||||
%2 = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
|
||||
%mul = mul <vscale x 8 x i32> %1, %2
|
||||
%shr = lshr <vscale x 8 x i32> %mul, %splat
|
||||
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %tr
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
|
||||
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
|
||||
%2 = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
|
||||
%mul = mul <vscale x 4 x i64> %1, %2
|
||||
%shr = lshr <vscale x 4 x i64> %mul, %splat
|
||||
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %tr
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
|
||||
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
|
||||
%2 = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
|
||||
%mul = mul <vscale x 2 x i128> %1, %2
|
||||
%shr = lshr <vscale x 2 x i128> %mul, %splat
|
||||
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %tr
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
132
test/CodeGen/AArch64/sve2-int-mulh.ll
Normal file
132
test/CodeGen/AArch64/sve2-int-mulh.ll
Normal file
@ -0,0 +1,132 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; SMULH
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smulh z0.b, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
|
||||
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
|
||||
%2 = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
|
||||
%mul = mul <vscale x 16 x i16> %1, %2
|
||||
%shr = lshr <vscale x 16 x i16> %mul, %splat
|
||||
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
|
||||
ret <vscale x 16 x i8> %tr
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smulh z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
|
||||
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
|
||||
%2 = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
|
||||
%mul = mul <vscale x 8 x i32> %1, %2
|
||||
%shr = lshr <vscale x 8 x i32> %mul, %splat
|
||||
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %tr
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smulh z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
|
||||
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
|
||||
%2 = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
|
||||
%mul = mul <vscale x 4 x i64> %1, %2
|
||||
%shr = lshr <vscale x 4 x i64> %mul, %splat
|
||||
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %tr
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: smulh_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: smulh z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
|
||||
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
|
||||
%1 = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
|
||||
%2 = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
|
||||
%mul = mul <vscale x 2 x i128> %1, %2
|
||||
%shr = lshr <vscale x 2 x i128> %mul, %splat
|
||||
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %tr
|
||||
}
|
||||
|
||||
;
|
||||
; UMULH
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umulh z0.b, z0.b, z1.b
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
|
||||
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
|
||||
%2 = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
|
||||
%mul = mul <vscale x 16 x i16> %1, %2
|
||||
%shr = lshr <vscale x 16 x i16> %mul, %splat
|
||||
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
|
||||
ret <vscale x 16 x i8> %tr
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umulh z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
|
||||
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
|
||||
%2 = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
|
||||
%mul = mul <vscale x 8 x i32> %1, %2
|
||||
%shr = lshr <vscale x 8 x i32> %mul, %splat
|
||||
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
|
||||
ret <vscale x 8 x i16> %tr
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umulh z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
|
||||
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
|
||||
%2 = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
|
||||
%mul = mul <vscale x 4 x i64> %1, %2
|
||||
%shr = lshr <vscale x 4 x i64> %mul, %splat
|
||||
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
|
||||
ret <vscale x 4 x i32> %tr
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
|
||||
; CHECK-LABEL: umulh_i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: umulh z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
|
||||
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
|
||||
%1 = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
|
||||
%2 = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
|
||||
%mul = mul <vscale x 2 x i128> %1, %2
|
||||
%shr = lshr <vscale x 2 x i128> %mul, %splat
|
||||
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
|
||||
ret <vscale x 2 x i64> %tr
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve2" }
|
Loading…
Reference in New Issue
Block a user