1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[AArch64] Remove AArch64ISD::NEG

This NEG node is just a vector negation, easily represented as a SUB
zero. Removing it from the one place it is generated is essentially an
NFC, but can allow some extra folding. The updated tests are now loading
different constant literals, which have already been negated.

Differential Revision: https://reviews.llvm.org/D103703
This commit is contained in:
David Green 2021-06-05 19:54:42 +01:00
parent a20eb786da
commit beaf4b0f77
6 changed files with 113 additions and 171 deletions

View File

@ -1912,7 +1912,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
MAKE_CASE(AArch64ISD::NEG)
MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
@ -10483,7 +10482,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
: Intrinsic::aarch64_neon_ushl;
// negate the shift amount
SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),

View File

@ -172,9 +172,6 @@ enum NodeType : unsigned {
// element must be identical.
BSP,
// Vector arithmetic negation
NEG,
// Vector shuffles
ZIP1,
ZIP2,

View File

@ -532,8 +532,6 @@ def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@ -4158,14 +4156,6 @@ def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
(NOTv16i8 V128:$Vd, V128:$Vn)>;
def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>;
def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>;
def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>;
def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
@ -4693,8 +4683,6 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
(FCVTASv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),

View File

@ -9,17 +9,16 @@ define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x8, .LCPI0_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: adrp x8, .LCPI0_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: adrp x8, .LCPI0_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: adrp x8, .LCPI0_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: adrp x8, .LCPI0_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -134,17 +133,16 @@ define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: adrp x8, .LCPI5_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_3]
; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_3]
; CHECK-NEXT: adrp x8, .LCPI5_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -161,17 +159,16 @@ define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: adrp x8, .LCPI6_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
; CHECK-NEXT: adrp x8, .LCPI6_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_3]
; CHECK-NEXT: adrp x8, .LCPI6_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_2]
; CHECK-NEXT: adrp x8, .LCPI6_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_3]
; CHECK-NEXT: adrp x8, .LCPI6_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -192,17 +189,16 @@ define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: adrp x8, .LCPI7_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
; CHECK-NEXT: adrp x8, .LCPI7_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_3]
; CHECK-NEXT: adrp x8, .LCPI7_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_2]
; CHECK-NEXT: adrp x8, .LCPI7_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_3]
; CHECK-NEXT: adrp x8, .LCPI7_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -221,17 +217,16 @@ define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
; CHECK-NEXT: adrp x8, .LCPI8_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1]
; CHECK-NEXT: adrp x8, .LCPI8_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_3]
; CHECK-NEXT: adrp x8, .LCPI8_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_2]
; CHECK-NEXT: adrp x8, .LCPI8_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_3]
; CHECK-NEXT: adrp x8, .LCPI8_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -250,17 +245,16 @@ define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
; CHECK-NEXT: adrp x8, .LCPI9_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
; CHECK-NEXT: adrp x8, .LCPI9_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_3]
; CHECK-NEXT: adrp x8, .LCPI9_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_2]
; CHECK-NEXT: adrp x8, .LCPI9_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_3]
; CHECK-NEXT: adrp x8, .LCPI9_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -330,17 +324,16 @@ define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: adrp x8, .LCPI12_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_1]
; CHECK-NEXT: adrp x8, .LCPI12_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_3]
; CHECK-NEXT: adrp x8, .LCPI12_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_2]
; CHECK-NEXT: adrp x8, .LCPI12_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_3]
; CHECK-NEXT: adrp x8, .LCPI12_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -362,17 +355,16 @@ define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI13_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1]
; CHECK-NEXT: adrp x8, .LCPI13_2
; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_2]
; CHECK-NEXT: adrp x8, .LCPI13_3
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_3]
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v3.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: sshl v2.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v2.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -392,17 +384,16 @@ define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI14_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
; CHECK-NEXT: adrp x8, .LCPI14_2
; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_2]
; CHECK-NEXT: adrp x8, .LCPI14_3
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_3]
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v3.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: sshl v2.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v2.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -422,17 +413,16 @@ define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI15_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
; CHECK-NEXT: adrp x8, .LCPI15_2
; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_2]
; CHECK-NEXT: adrp x8, .LCPI15_3
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_3]
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v3.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: sshl v2.4s, v1.4s, v3.4s
; CHECK-NEXT: usra v2.4s, v1.4s, #31
; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -453,17 +443,16 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-NEXT: adrp x8, .LCPI16_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_1]
; CHECK-NEXT: adrp x8, .LCPI16_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_3]
; CHECK-NEXT: adrp x8, .LCPI16_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_2]
; CHECK-NEXT: adrp x8, .LCPI16_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_3]
; CHECK-NEXT: adrp x8, .LCPI16_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -482,17 +471,16 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-NEXT: adrp x8, .LCPI17_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_1]
; CHECK-NEXT: adrp x8, .LCPI17_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_3]
; CHECK-NEXT: adrp x8, .LCPI17_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_2]
; CHECK-NEXT: adrp x8, .LCPI17_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_3]
; CHECK-NEXT: adrp x8, .LCPI17_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -511,17 +499,16 @@ define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
; CHECK-NEXT: adrp x8, .LCPI18_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_1]
; CHECK-NEXT: adrp x8, .LCPI18_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_3]
; CHECK-NEXT: adrp x8, .LCPI18_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_2]
; CHECK-NEXT: adrp x8, .LCPI18_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_3]
; CHECK-NEXT: adrp x8, .LCPI18_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -591,17 +578,16 @@ define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
; CHECK-NEXT: adrp x8, .LCPI21_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_1]
; CHECK-NEXT: adrp x8, .LCPI21_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI21_3]
; CHECK-NEXT: adrp x8, .LCPI21_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI21_2]
; CHECK-NEXT: adrp x8, .LCPI21_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_3]
; CHECK-NEXT: adrp x8, .LCPI21_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -622,17 +608,16 @@ define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
; CHECK-NEXT: adrp x8, .LCPI22_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_1]
; CHECK-NEXT: adrp x8, .LCPI22_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_3]
; CHECK-NEXT: adrp x8, .LCPI22_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_2]
; CHECK-NEXT: adrp x8, .LCPI22_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_3]
; CHECK-NEXT: adrp x8, .LCPI22_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -651,17 +636,16 @@ define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
; CHECK-NEXT: adrp x8, .LCPI23_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1]
; CHECK-NEXT: adrp x8, .LCPI23_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_3]
; CHECK-NEXT: adrp x8, .LCPI23_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_2]
; CHECK-NEXT: adrp x8, .LCPI23_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_3]
; CHECK-NEXT: adrp x8, .LCPI23_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -680,17 +664,16 @@ define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
; CHECK-NEXT: adrp x8, .LCPI24_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_1]
; CHECK-NEXT: adrp x8, .LCPI24_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI24_3]
; CHECK-NEXT: adrp x8, .LCPI24_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI24_2]
; CHECK-NEXT: adrp x8, .LCPI24_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_3]
; CHECK-NEXT: adrp x8, .LCPI24_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -710,17 +693,16 @@ define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
; CHECK-NEXT: adrp x8, .LCPI25_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_1]
; CHECK-NEXT: adrp x8, .LCPI25_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI25_3]
; CHECK-NEXT: adrp x8, .LCPI25_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI25_2]
; CHECK-NEXT: adrp x8, .LCPI25_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_3]
; CHECK-NEXT: adrp x8, .LCPI25_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
@ -738,17 +720,16 @@ define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
; CHECK-NEXT: adrp x8, .LCPI26_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
; CHECK-NEXT: adrp x8, .LCPI26_3
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI26_3]
; CHECK-NEXT: adrp x8, .LCPI26_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI26_2]
; CHECK-NEXT: adrp x8, .LCPI26_3
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_2]
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_3]
; CHECK-NEXT: adrp x8, .LCPI26_4
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v3.4s, v2.4s, v3.4s
; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
; CHECK-NEXT: ushl v2.4s, v2.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b

View File

@ -70,22 +70,20 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-NEXT: fmov s0, w0
; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: mov v0.h[1], w1
; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1]
; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: adrp x9, .LCPI4_3
; CHECK-NEXT: mov v0.h[2], w2
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI4_3]
; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h
; CHECK-NEXT: adrp x8, .LCPI4_4
; CHECK-NEXT: shl v2.4h, v0.4h, #1
; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_4]
; CHECK-NEXT: neg v1.4h, v1.4h
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: mul v0.4h, v0.4h, v3.4h
; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: shl v3.4h, v0.4h, #1
; CHECK-NEXT: movi d2, #0x0000000000ffff
; CHECK-NEXT: ushl v1.4h, v3.4h, v1.4h
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_3]
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-NEXT: ushl v0.4h, v0.4h, v2.4h
; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: cmhi v0.4h, v0.4h, v3.4h
; CHECK-NEXT: umov w0, v0.h[0]

View File

@ -14,7 +14,6 @@ define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI0_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -79,7 +78,6 @@ define <4 x i32> @test_urem_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI3_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -104,7 +102,6 @@ define <4 x i32> @test_urem_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -131,7 +128,6 @@ define <4 x i32> @test_urem_odd_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -156,7 +152,6 @@ define <4 x i32> @test_urem_odd_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI6_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -185,7 +180,6 @@ define <4 x i32> @test_urem_odd_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI7_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -212,7 +206,6 @@ define <4 x i32> @test_urem_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI8_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -239,7 +232,6 @@ define <4 x i32> @test_urem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI9_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -311,7 +303,6 @@ define <4 x i32> @test_urem_odd_even_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI12_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -340,7 +331,6 @@ define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI13_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -367,7 +357,6 @@ define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI14_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -394,7 +383,6 @@ define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI15_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -423,7 +411,6 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI16_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -450,7 +437,6 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI17_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -477,7 +463,6 @@ define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwi
; CHECK-NEXT: adrp x8, .LCPI18_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -525,7 +510,6 @@ define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI20_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -552,7 +536,6 @@ define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI21_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -581,7 +564,6 @@ define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI22_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -608,7 +590,6 @@ define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI23_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -635,7 +616,6 @@ define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: adrp x8, .LCPI24_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -663,7 +643,6 @@ define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nou
; CHECK-NEXT: adrp x8, .LCPI25_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
@ -689,7 +668,6 @@ define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) no
; CHECK-NEXT: adrp x8, .LCPI26_3
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s
; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b