1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[AArch64][SVE] Lower MULHU/MULHS nodes to umulh/smulh instructions

Mark MULHS/MULHU nodes as legal for both scalable and fixed SVE types,
and lower them to the appropriate SVE instructions.

Additionally now that the MULH nodes are legal, integer divides can be
expanded into a more performant code sequence.

Differential Revision: https://reviews.llvm.org/D100487
This commit is contained in:
Bradley Smith 2021-04-13 15:19:59 +01:00
parent 7d693d1c6d
commit 935e65cc31
9 changed files with 1452 additions and 7 deletions

View File

@ -1126,6 +1126,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@ -1149,8 +1151,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
}
@ -1259,6 +1259,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
@ -1453,6 +1457,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
@ -1799,6 +1805,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::SHL_PRED)
MAKE_CASE(AArch64ISD::SMAX_PRED)
@ -4519,6 +4527,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSET_ROUNDING(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
/*OverrideNEON=*/true);
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
/*OverrideNEON=*/true);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:

View File

@ -89,6 +89,8 @@ enum NodeType : unsigned {
FMUL_PRED,
FSUB_PRED,
MUL_PRED,
MULHS_PRED,
MULHU_PRED,
SDIV_PRED,
SHL_PRED,
SMAX_PRED,

View File

@ -192,10 +192,12 @@ def AArch64mul_p : SDNode<"AArch64ISD::MUL_PRED", SDT_AArch64Arith>;
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
@ -348,6 +350,8 @@ let Predicates = [HasSVE] in {
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
defm MUL_ZPZZ : sve_int_bin_pred_bhsd<AArch64mul_p>;
defm SMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64smulh_p>;
defm UMULH_ZPZZ : sve_int_bin_pred_bhsd<AArch64umulh_p>;
defm SDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b100, "sdiv", "SDIV_ZPZZ", int_aarch64_sve_sdiv, DestructiveBinaryCommWithRev, "SDIVR_ZPmZ">;
defm UDIV_ZPmZ : sve_int_bin_pred_arit_2_div<0b101, "udiv", "UDIV_ZPZZ", int_aarch64_sve_udiv, DestructiveBinaryCommWithRev, "UDIVR_ZPmZ">;
@ -2385,8 +2389,8 @@ let Predicates = [HasSVE2] in {
// SVE2 integer multiply vectors (unpredicated)
defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>;
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag>;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag>;
defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag, AArch64smulh_p>;
defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag, AArch64umulh_p>;
defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>;
// Add patterns for unpredicated version of smulh and umulh.

View File

@ -0,0 +1,144 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
; Check that expensive divides are expanded into a more performant sequence
;
; SDIV
;
define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: sdiv_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #86 // =0x56
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: lsr z1.b, z0.b, #7
; CHECK-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z1.d, z1.d, z2.d
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%div = sdiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer)
ret <vscale x 16 x i8> %div
}
define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: sdiv_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #21846
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: lsr z1.h, z0.h, #15
; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z1.d, z1.d, z2.d
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%div = sdiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
ret <vscale x 8 x i16> %div
}
define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sdiv_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #21846
; CHECK-NEXT: movk w8, #21845, lsl #16
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: lsr z1.s, z0.s, #31
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z1.d, z1.d, z2.d
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%div = sdiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
ret <vscale x 4 x i32> %div
}
define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: sdiv_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #6148914691236517205
; CHECK-NEXT: movk x8, #21846
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: lsr z1.d, z0.d, #63
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
; CHECK-NEXT: and z1.d, z1.d, z2.d
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%div = sdiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
ret <vscale x 2 x i64> %div
}
;
; UDIV
;
define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: udiv_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov z2.b, #1 // =0x1
; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: lsr z1.b, z1.b, #1
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, #3
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: ret
%div = udiv <vscale x 16 x i8> %a, shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> undef, i8 3, i32 0), <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer)
ret <vscale x 16 x i8> %div
}
define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: udiv_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-21845
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z2.h, w8
; CHECK-NEXT: mov z1.h, #1 // =0x1
; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z0.h
; CHECK-NEXT: lsr z2.h, z2.h, #1
; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, #3
; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h
; CHECK-NEXT: ret
%div = udiv <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> undef, i16 3, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
ret <vscale x 8 x i16> %div
}
define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: udiv_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #43691
; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z2.s, w8
; CHECK-NEXT: mov z1.s, #3 // =0x3
; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z0.s
; CHECK-NEXT: lsr z2.s, z2.s, #1
; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, #1
; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s
; CHECK-NEXT: ret
%div = udiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
ret <vscale x 4 x i32> %div
}
define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: udiv_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-6148914691236517206
; CHECK-NEXT: movk x8, #43691
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z2.d, x8
; CHECK-NEXT: mov z1.d, #3 // =0x3
; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z0.d
; CHECK-NEXT: lsr z2.d, z2.d, #1
; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, #1
; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d
; CHECK-NEXT: ret
%div = udiv <vscale x 2 x i64> %a, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 3, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer)
ret <vscale x 2 x i64> %div
}
attributes #0 = { "target-features"="+sve" }

View File

@ -970,7 +970,7 @@ define void @udiv_v32i64(<32 x i64>* %a, <32 x i64>* %b) #0 {
; This used to crash because isUnaryPredicate and BuildUDIV don't know how
; a SPLAT_VECTOR of fixed vector type should be handled.
define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 {
define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #1 {
; CHECK-LABEL: udiv_constantsplat_v8i32:
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
@ -985,3 +985,4 @@ define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 {
}
attributes #0 = { "target-features"="+sve" }
attributes #1 = { "target-features"="+sve" minsize }

File diff suppressed because it is too large Load Diff

View File

@ -776,7 +776,7 @@ define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i64> %a){
ret <vscale x 2 x i64> %lshr
}
define <vscale x 4 x i32> @sdiv_const(<vscale x 4 x i32> %a) {
define <vscale x 4 x i32> @sdiv_const(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: sdiv_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.s, #3 // =0x3
@ -788,7 +788,7 @@ entry:
ret <vscale x 4 x i32> %div
}
define <vscale x 4 x i32> @udiv_const(<vscale x 4 x i32> %a) {
define <vscale x 4 x i32> @udiv_const(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: udiv_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z1.s, #3 // =0x3
@ -799,3 +799,5 @@ entry:
%div = udiv <vscale x 4 x i32> %a, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> undef, i32 3, i32 0), <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer)
ret <vscale x 4 x i32> %div
}
attributes #0 = { minsize }

View File

@ -0,0 +1,140 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
;
; SMULH
;
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: smulh_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
%1 = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
%2 = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
%mul = mul <vscale x 16 x i16> %1, %2
%shr = lshr <vscale x 16 x i16> %mul, %splat
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
ret <vscale x 16 x i8> %tr
}
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: smulh_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
%1 = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
%2 = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
%mul = mul <vscale x 8 x i32> %1, %2
%shr = lshr <vscale x 8 x i32> %mul, %splat
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
ret <vscale x 8 x i16> %tr
}
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: smulh_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
%1 = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
%2 = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
%mul = mul <vscale x 4 x i64> %1, %2
%shr = lshr <vscale x 4 x i64> %mul, %splat
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
ret <vscale x 4 x i32> %tr
}
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: smulh_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
%1 = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
%2 = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
%mul = mul <vscale x 2 x i128> %1, %2
%shr = lshr <vscale x 2 x i128> %mul, %splat
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
ret <vscale x 2 x i64> %tr
}
;
; UMULH
;
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: umulh_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
%1 = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
%2 = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
%mul = mul <vscale x 16 x i16> %1, %2
%shr = lshr <vscale x 16 x i16> %mul, %splat
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
ret <vscale x 16 x i8> %tr
}
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: umulh_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
%1 = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
%2 = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
%mul = mul <vscale x 8 x i32> %1, %2
%shr = lshr <vscale x 8 x i32> %mul, %splat
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
ret <vscale x 8 x i16> %tr
}
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: umulh_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
%1 = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
%2 = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
%mul = mul <vscale x 4 x i64> %1, %2
%shr = lshr <vscale x 4 x i64> %mul, %splat
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
ret <vscale x 4 x i32> %tr
}
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: umulh_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
%1 = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
%2 = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
%mul = mul <vscale x 2 x i128> %1, %2
%shr = lshr <vscale x 2 x i128> %mul, %splat
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
ret <vscale x 2 x i64> %tr
}
attributes #0 = { "target-features"="+sve" }

View File

@ -0,0 +1,132 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
;
; SMULH
;
define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: smulh_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: smulh z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
%1 = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
%2 = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
%mul = mul <vscale x 16 x i16> %1, %2
%shr = lshr <vscale x 16 x i16> %mul, %splat
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
ret <vscale x 16 x i8> %tr
}
define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: smulh_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: smulh z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
%1 = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
%2 = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
%mul = mul <vscale x 8 x i32> %1, %2
%shr = lshr <vscale x 8 x i32> %mul, %splat
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
ret <vscale x 8 x i16> %tr
}
define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: smulh_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: smulh z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
%1 = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
%2 = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
%mul = mul <vscale x 4 x i64> %1, %2
%shr = lshr <vscale x 4 x i64> %mul, %splat
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
ret <vscale x 4 x i32> %tr
}
define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: smulh_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: smulh z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
%1 = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
%2 = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
%mul = mul <vscale x 2 x i128> %1, %2
%shr = lshr <vscale x 2 x i128> %mul, %splat
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
ret <vscale x 2 x i64> %tr
}
;
; UMULH
;
define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
; CHECK-LABEL: umulh_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: umulh z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%insert = insertelement <vscale x 16 x i16> undef, i16 8, i64 0
%splat = shufflevector <vscale x 16 x i16> %insert, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
%1 = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
%2 = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
%mul = mul <vscale x 16 x i16> %1, %2
%shr = lshr <vscale x 16 x i16> %mul, %splat
%tr = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8>
ret <vscale x 16 x i8> %tr
}
define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
; CHECK-LABEL: umulh_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: umulh z0.h, z0.h, z1.h
; CHECK-NEXT: ret
%insert = insertelement <vscale x 8 x i32> undef, i32 16, i64 0
%splat = shufflevector <vscale x 8 x i32> %insert, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
%1 = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
%2 = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
%mul = mul <vscale x 8 x i32> %1, %2
%shr = lshr <vscale x 8 x i32> %mul, %splat
%tr = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16>
ret <vscale x 8 x i16> %tr
}
define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: umulh_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: umulh z0.s, z0.s, z1.s
; CHECK-NEXT: ret
%insert = insertelement <vscale x 4 x i64> undef, i64 32, i64 0
%splat = shufflevector <vscale x 4 x i64> %insert, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
%1 = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
%2 = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
%mul = mul <vscale x 4 x i64> %1, %2
%shr = lshr <vscale x 4 x i64> %mul, %splat
%tr = trunc <vscale x 4 x i64> %shr to <vscale x 4 x i32>
ret <vscale x 4 x i32> %tr
}
define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
; CHECK-LABEL: umulh_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: umulh z0.d, z0.d, z1.d
; CHECK-NEXT: ret
%insert = insertelement <vscale x 2 x i128> undef, i128 64, i64 0
%splat = shufflevector <vscale x 2 x i128> %insert, <vscale x 2 x i128> undef, <vscale x 2 x i32> zeroinitializer
%1 = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
%2 = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
%mul = mul <vscale x 2 x i128> %1, %2
%shr = lshr <vscale x 2 x i128> %mul, %splat
%tr = trunc <vscale x 2 x i128> %shr to <vscale x 2 x i64>
ret <vscale x 2 x i64> %tr
}
attributes #0 = { "target-features"="+sve2" }