1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

AArch64: fix big-endian immediate materialisation

We were materialising big-endian constants using DAG nodes with types different
from what was requested, followed by a bitcast. This is fine on little-endian
machines where bitcasting is a nop, but we need a slightly different
representation for big-endian. This adds a new set of NVCAST (natural-vector
cast) operations which are always nops.

Patch by Asiri Rathnayake.

llvm-svn: 217138
This commit is contained in:
Tim Northover 2014-09-04 09:46:14 +00:00
parent 721141b1a5
commit 54d4e0e00b
4 changed files with 716 additions and 21 deletions

View File

@ -5466,13 +5466,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT.getSizeInBits() == 128) {
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
DAG.getConstant(CnstVal, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// Support the V64 version via subregister insertion.
SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
DAG.getConstant(CnstVal, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
@ -5481,7 +5481,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@ -5490,7 +5490,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@ -5499,7 +5499,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@ -5508,7 +5508,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@ -5517,7 +5517,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@ -5526,7 +5526,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@ -5535,7 +5535,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@ -5544,7 +5544,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
@ -5552,7 +5552,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The few faces of FMOV...
@ -5561,7 +5561,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
@ -5569,7 +5569,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
DAG.getConstant(CnstVal, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
// The many faces of MVNI...
@ -5580,7 +5580,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
@ -5589,7 +5589,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
@ -5598,7 +5598,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(16, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
@ -5607,7 +5607,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(24, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
@ -5616,7 +5616,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(0, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
@ -5625,7 +5625,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(8, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
@ -5634,7 +5634,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(264, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
@ -5643,7 +5643,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
DAG.getConstant(CnstVal, MVT::i32),
DAG.getConstant(272, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}

View File

@ -162,6 +162,13 @@ enum {
SITOF,
UITOF,
/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
/// world w.r.t vectors; which causes additional REV instructions to be
/// generated to compensate for the byte-swapping. But sometimes we do
/// need to re-interpret the data in SIMD vector registers in big-endian
/// mode without emitting such REV instructions.
NVCAST,
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,

View File

@ -237,6 +237,7 @@ def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
//===----------------------------------------------------------------------===//
@ -4957,6 +4958,59 @@ def : Pat<(trap), (BRK 1)>;
// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
//
// Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;

View File

@ -0,0 +1,634 @@
; RUN: llc -mtriple=aarch64_be--linux-gnu < %s | FileCheck %s
@vec_v8i16 = global <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
; CHECK-LABEL: movi_modimm_t1:
define i16 @movi_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t2:
define i16 @movi_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t3:
define i16 @movi_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t4:
define i16 @movi_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t5:
define i16 @movi_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t6:
define i16 @movi_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t7:
define i16 @movi_modimm_t7() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 511, i16 0, i16 511, i16 0, i16 511, i16 0, i16 511, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t8:
define i16 @movi_modimm_t8() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t9:
define i16 @movi_modimm_t9() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: movi_modimm_t10:
define i16 @movi_modimm_t10() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: fmov_modimm_t11:
define i16 @fmov_modimm_t11() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: fmov_modimm_t12:
define i16 @fmov_modimm_t12() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t1:
define i16 @mvni_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t2:
define i16 @mvni_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t3:
define i16 @mvni_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t4:
define i16 @mvni_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t5:
define i16 @mvni_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t6:
define i16 @mvni_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t7:
define i16 @mvni_modimm_t7() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
; CHECK-LABEL: mvni_modimm_t8:
define i16 @mvni_modimm_t8() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
%in = load <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
}
declare i8 @f_v8i8(<8 x i8> %arg)
declare i16 @f_v4i16(<4 x i16> %arg)
declare i32 @f_v2i32(<2 x i32> %arg)
declare i8 @f_v16i8(<16 x i8> %arg)
declare i16 @f_v8i16(<8 x i16> %arg)
declare i32 @f_v4i32(<4 x i32> %arg)
; CHECK-LABEL: movi_modimm_t1_call:
define void @movi_modimm_t1_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 7, i16 0, i16 7, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 6, i32 6>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret void
}
; CHECK-LABEL: movi_modimm_t2_call:
define void @movi_modimm_t2_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1792, i16 0, i16 1792, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 1536, i32 1536>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 768, i32 768, i32 768, i32 768>)
ret void
}
; CHECK-LABEL: movi_modimm_t3_call:
define void @movi_modimm_t3_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 0, i16 7, i16 0, i16 7>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 393216, i32 393216>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 196608, i32 196608, i32 196608, i32 196608>)
ret void
}
; CHECK-LABEL: movi_modimm_t4_call:
define void @movi_modimm_t4_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 0, i16 1792, i16 0, i16 1792>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, lsl #24
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 100663296, i32 100663296>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, lsl #24
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 50331648, i32 50331648, i32 50331648, i32 50331648>)
ret void
}
; CHECK-LABEL: movi_modimm_t5_call:
define void @movi_modimm_t5_call() {
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 7, i16 7, i16 7, i16 7>)
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 393222, i32 393222>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 196611, i32 196611, i32 196611, i32 196611>)
ret void
}
; CHECK-LABEL: movi_modimm_t6_call:
define void @movi_modimm_t6_call() {
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x8, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8>)
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x7, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1792, i16 1792, i16 1792, i16 1792>)
; CHECK: movi v[[REG1:[0-9]+]].4h, #0x6, lsl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 100664832, i32 100664832>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x4, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>)
; CHECK: movi v[[REG1:[0-9]+]].8h, #0x3, lsl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 50332416, i32 50332416, i32 50332416, i32 50332416>)
ret void
}
; CHECK-LABEL: movi_modimm_t7_call:
define void @movi_modimm_t7_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 255, i8 8, i8 0, i8 0, i8 255, i8 8, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 2047, i16 0, i16 2047, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #8
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 1791, i32 1791>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #8
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 1023, i32 1023, i32 1023, i32 1023>)
ret void
}
; CHECK-LABEL: movi_modimm_t8_call:
define void @movi_modimm_t8_call() {
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x8, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 255, i8 255, i8 8, i8 0, i8 255, i8 255, i8 8, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x7, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 65535, i16 7, i16 65535, i16 7>)
; CHECK: movi v[[REG1:[0-9]+]].2s, #0x6, msl #16
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 458751, i32 458751>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x4, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4>)
; CHECK: movi v[[REG1:[0-9]+]].4s, #0x3, msl #16
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 262143, i32 262143, i32 262143, i32 262143>)
ret void
}
; CHECK-LABEL: movi_modimm_t9_call:
define void @movi_modimm_t9_call() {
; CHECK: movi v[[REG1:[0-9]+]].8b, #0x8
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
; CHECK: movi v[[REG1:[0-9]+]].8b, #0x7
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 1799, i16 1799, i16 1799, i16 1799>)
; CHECK: movi v[[REG1:[0-9]+]].8b, #0x6
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 101058054, i32 101058054>)
; CHECK: movi v[[REG1:[0-9]+]].16b, #0x5
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>)
; CHECK: movi v[[REG1:[0-9]+]].16b, #0x4
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>)
; CHECK: movi v[[REG1:[0-9]+]].16b, #0x3
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 50529027, i32 50529027, i32 50529027, i32 50529027>)
ret void
}
; CHECK-LABEL: movi_modimm_t10_call:
define void @movi_modimm_t10_call() {
; CHECK: movi d[[REG1:[0-9]+]], #0x0000ff000000ff
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>)
; CHECK: movi d[[REG1:[0-9]+]], #0x00ffff0000ffff
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 -1, i16 0, i16 -1, i16 0>)
; CHECK: movi d[[REG1:[0-9]+]], #0xffffffffffffffff
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 -1, i32 -1>)
; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffff00ffffff
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0>)
; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffffffff0000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1, i16 -1>)
; CHECK: movi v[[REG1:[0-9]+]].2d, #0xffffffff00000000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 0, i32 -1, i32 0, i32 -1>)
ret void
}
; CHECK-LABEL: fmov_modimm_t11_call:
define void @fmov_modimm_t11_call() {
; CHECK: fmov v[[REG1:[0-9]+]].2s, #4.00000000
; CHECK-NEXT: rev64 v{{[0-9]+}}.8b, v[[REG1]].8b
; CHECK-NEXT: bl f_v8i8
call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 128, i8 64, i8 0, i8 0, i8 128, i8 64>)
; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.75000000
; CHECK-NEXT: rev64 v{{[0-9]+}}.4h, v[[REG1]].4h
; CHECK-NEXT: bl f_v4i16
call i16 @f_v4i16(<4 x i16> <i16 0, i16 16496, i16 0, i16 16496>)
; CHECK: fmov v[[REG1:[0-9]+]].2s, #3.50000000
; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s
; CHECK-NEXT: bl f_v2i32
call i32 @f_v2i32(<2 x i32> <i32 1080033280, i32 1080033280>)
; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64>)
; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.00000000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>)
; CHECK: fmov v[[REG1:[0-9]+]].4s, #2.75000000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 1076887552, i32 1076887552, i32 1076887552, i32 1076887552>)
ret void
}
; CHECK-LABEL: fmov_modimm_t12_call:
define void @fmov_modimm_t12_call() {
; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.18750000
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v16i8
call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 200, i8 63, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 200, i8 63>)
; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17968750
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].8h, v[[REG1]].8h
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v8i16
call i16 @f_v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>)
; CHECK: fmov v[[REG1:[0-9]+]].2d, #0.17187500
; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].4s, v[[REG1]].4s
; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
; CHECK-NEXT: bl f_v4i32
call i32 @f_v4i32(<4 x i32> <i32 0, i32 1069940736, i32 0, i32 1069940736>)
ret void
}