1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[DAGCombine] Initialize the default operation action for SIGN_EXTEND_INREG for vector type as 'expand' instead of 'legal'

For now, we didn't set the default operation action for SIGN_EXTEND_INREG for
vector type, which is 0 by default, that is legal. However, most target didn't
have native instructions to support this opcode. It should be set as expand by
default, as what we did for ANY_EXTEND_VECTOR_INREG.

Differential Revision: https://reviews.llvm.org/D70000
This commit is contained in:
QingShan Zhang 2020-01-03 03:26:41 +00:00
parent 6277397419
commit 0a7fe2ac65
8 changed files with 403 additions and 9 deletions

View File

@ -696,6 +696,7 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);

View File

@ -189,6 +189,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
}
// Compute derived properties from the register classes

View File

@ -379,6 +379,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
// It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Legal);
// Some truncating stores are legal too.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);

View File

@ -1536,6 +1536,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
// Types natively supported:
for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {

View File

@ -194,6 +194,13 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, BoolV, Legal);
}
if (Use64b)
for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
else
for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
setTargetDAGCombine(ISD::VSELECT);
}

View File

@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=armv8 -mattr=+mve | FileCheck %s
; RUN: llc < %s -mtriple=armv8 | FileCheck %s
define <4 x i32> @test(<4 x i32> %m) {
; CHECK-LABEL: test:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov d1, r2, r3
; CHECK-NEXT: vmov d0, r0, r1
; CHECK-NEXT: vshl.i32 q0, q0, #24
; CHECK-NEXT: vshr.s32 q0, q0, #24
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: vshl.i32 q8, q8, #24
; CHECK-NEXT: vshr.s32 q8, q8, #24
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: bx lr
entry:
%shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>

View File

@ -0,0 +1,272 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=hexagon | FileCheck %s
; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length64b | FileCheck %s --check-prefix=CHECK-64B
; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length128b | FileCheck %s --check-prefix=CHECK-128B
define <2 x i32> @test1(<2 x i32> %m) {
; CHECK-LABEL: test1:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: r1 = extract(r1,#8,#0)
; CHECK-NEXT: r0 = sxtb(r0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: }
;
; CHECK-64B-LABEL: test1:
; CHECK-64B: .cfi_startproc
; CHECK-64B-NEXT: // %bb.0: // %entry
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: r1 = extract(r1,#8,#0)
; CHECK-64B-NEXT: r0 = sxtb(r0)
; CHECK-64B-NEXT: jumpr r31
; CHECK-64B-NEXT: }
;
; CHECK-128B-LABEL: test1:
; CHECK-128B: .cfi_startproc
; CHECK-128B-NEXT: // %bb.0: // %entry
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: r1 = extract(r1,#8,#0)
; CHECK-128B-NEXT: r0 = sxtb(r0)
; CHECK-128B-NEXT: jumpr r31
; CHECK-128B-NEXT: }
entry:
%shl = shl <2 x i32> %m, <i32 24, i32 24>
%shr = ashr exact <2 x i32> %shl, <i32 24, i32 24>
ret <2 x i32> %shr
}
define <16 x i32> @test2(<16 x i32> %m) {
; CHECK-LABEL: test2:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: r3 = extract(r3,#8,#0)
; CHECK-NEXT: r29 = add(r29,#-8)
; CHECK-NEXT: r2 = sxtb(r2)
; CHECK-NEXT: r4 = sxtb(r4)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r5 = extract(r5,#8,#0)
; CHECK-NEXT: r13:12 = memd(r29+#48)
; CHECK-NEXT: memd(r29+#0) = r17:16
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r13 = extract(r13,#8,#0)
; CHECK-NEXT: r12 = sxtb(r12)
; CHECK-NEXT: r15:14 = memd(r29+#40)
; CHECK-NEXT: r9:8 = memd(r29+#32)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r9 = extract(r9,#8,#0)
; CHECK-NEXT: r8 = sxtb(r8)
; CHECK-NEXT: r11:10 = memd(r29+#24)
; CHECK-NEXT: r7:6 = memd(r29+#16)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11 = extract(r11,#8,#0)
; CHECK-NEXT: r10 = sxtb(r10)
; CHECK-NEXT: r14 = sxtb(r14)
; CHECK-NEXT: r17:16 = memd(r29+#8)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r15 = extract(r15,#8,#0)
; CHECK-NEXT: r17 = extract(r17,#8,#0)
; CHECK-NEXT: r16 = sxtb(r16)
; CHECK-NEXT: r6 = sxtb(r6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r7 = extract(r7,#8,#0)
; CHECK-NEXT: memd(r0+#56) = r13:12
; CHECK-NEXT: memd(r0+#48) = r15:14
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memd(r0+#40) = r9:8
; CHECK-NEXT: memd(r0+#32) = r11:10
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memd(r0+#24) = r7:6
; CHECK-NEXT: memd(r0+#16) = r17:16
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: memd(r0+#8) = r5:4
; CHECK-NEXT: memd(r0+#0) = r3:2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r29 = add(r29,#8)
; CHECK-NEXT: r17:16 = memd(r29+#0)
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: } // 8-byte Folded Reload
;
; CHECK-64B-LABEL: test2:
; CHECK-64B: .cfi_startproc
; CHECK-64B-NEXT: // %bb.0: // %entry
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: r0 = #24
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v0.w = vasl(v0.w,r0)
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v0.w = vasr(v0.w,r0)
; CHECK-64B-NEXT: jumpr r31
; CHECK-64B-NEXT: }
;
; CHECK-128B-LABEL: test2:
; CHECK-128B: .cfi_startproc
; CHECK-128B-NEXT: // %bb.0: // %entry
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: r0 = #24
; CHECK-128B-NEXT: }
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: v0.w = vasl(v0.w,r0)
; CHECK-128B-NEXT: }
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: v0.w = vasr(v0.w,r0)
; CHECK-128B-NEXT: jumpr r31
; CHECK-128B-NEXT: }
entry:
%shl = shl <16 x i32> %m, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
%shr = ashr exact <16 x i32> %shl, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
ret <16 x i32> %shr
}
define <64 x i16> @test3(<64 x i16> %m) {
; CHECK-LABEL: test3:
; CHECK: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: {
; CHECK-NEXT: r3:2 = vaslh(r3:2,#8)
; CHECK-NEXT: r5:4 = vaslh(r5:4,#8)
; CHECK-NEXT: r9:8 = memd(r29+#96)
; CHECK-NEXT: r11:10 = memd(r29+#88)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r13:12 = vaslh(r9:8,#8)
; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
; CHECK-NEXT: r9:8 = memd(r29+#80)
; CHECK-NEXT: r7:6 = memd(r29+#104)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r15:14 = vaslh(r7:6,#8)
; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
; CHECK-NEXT: r7:6 = memd(r29+#72)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r15:14 = vasrh(r15:14,#8)
; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
; CHECK-NEXT: r15:14 = memd(r29+#64)
; CHECK-NEXT: memd(r0+#120) = r15:14
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r7:6 = vaslh(r7:6,#8)
; CHECK-NEXT: r15:14 = vaslh(r15:14,#8)
; CHECK-NEXT: r13:12 = memd(r29+#56)
; CHECK-NEXT: memd(r0+#112) = r13:12
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r13:12 = vaslh(r13:12,#8)
; CHECK-NEXT: r7:6 = vasrh(r7:6,#8)
; CHECK-NEXT: r11:10 = memd(r29+#48)
; CHECK-NEXT: memd(r0+#104) = r11:10
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
; CHECK-NEXT: r15:14 = vasrh(r15:14,#8)
; CHECK-NEXT: r9:8 = memd(r29+#40)
; CHECK-NEXT: memd(r0+#96) = r9:8
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
; CHECK-NEXT: r7:6 = memd(r29+#32)
; CHECK-NEXT: memd(r0+#88) = r7:6
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
; CHECK-NEXT: r15:14 = memd(r29+#0)
; CHECK-NEXT: memd(r0+#80) = r15:14
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r7:6 = vaslh(r7:6,#8)
; CHECK-NEXT: r15:14 = vaslh(r15:14,#8)
; CHECK-NEXT: r13:12 = memd(r29+#16)
; CHECK-NEXT: memd(r0+#72) = r13:12
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r13:12 = vaslh(r13:12,#8)
; CHECK-NEXT: r7:6 = vasrh(r7:6,#8)
; CHECK-NEXT: r11:10 = memd(r29+#24)
; CHECK-NEXT: memd(r0+#64) = r11:10
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11:10 = vaslh(r11:10,#8)
; CHECK-NEXT: r3:2 = vasrh(r3:2,#8)
; CHECK-NEXT: r9:8 = memd(r29+#8)
; CHECK-NEXT: memd(r0+#56) = r9:8
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r9:8 = vaslh(r9:8,#8)
; CHECK-NEXT: r13:12 = vasrh(r13:12,#8)
; CHECK-NEXT: memd(r0+#48) = r7:6
; CHECK-NEXT: memd(r0+#0) = r3:2
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r11:10 = vasrh(r11:10,#8)
; CHECK-NEXT: r7:6 = vasrh(r15:14,#8)
; CHECK-NEXT: memd(r0+#32) = r13:12
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r9:8 = vasrh(r9:8,#8)
; CHECK-NEXT: r5:4 = vasrh(r5:4,#8)
; CHECK-NEXT: memd(r0+#40) = r11:10
; CHECK-NEXT: memd(r0+#16) = r7:6
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: jumpr r31
; CHECK-NEXT: memd(r0+#24) = r9:8
; CHECK-NEXT: memd(r0+#8) = r5:4
; CHECK-NEXT: }
;
; CHECK-64B-LABEL: test3:
; CHECK-64B: .cfi_startproc
; CHECK-64B-NEXT: // %bb.0: // %entry
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: r0 = #8
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v0.h = vasl(v0.h,r0)
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v1.h = vasl(v1.h,r0)
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v0.h = vasr(v0.h,r0)
; CHECK-64B-NEXT: }
; CHECK-64B-NEXT: {
; CHECK-64B-NEXT: v1.h = vasr(v1.h,r0)
; CHECK-64B-NEXT: jumpr r31
; CHECK-64B-NEXT: }
;
; CHECK-128B-LABEL: test3:
; CHECK-128B: .cfi_startproc
; CHECK-128B-NEXT: // %bb.0: // %entry
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: r0 = #8
; CHECK-128B-NEXT: }
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: v0.h = vasl(v0.h,r0)
; CHECK-128B-NEXT: }
; CHECK-128B-NEXT: {
; CHECK-128B-NEXT: v0.h = vasr(v0.h,r0)
; CHECK-128B-NEXT: jumpr r31
; CHECK-128B-NEXT: }
entry:
%shl = shl <64 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%shr = ashr exact <64 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <64 x i16> %shr
}

View File

@ -1,6 +1,104 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i1(<4 x i32> %m) {
; CHECK-LABEL: sext_v4i32_v4i32_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vshl.i32 q0, q0, #31
; CHECK-NEXT: vshr.s32 q0, q0, #31
; CHECK-NEXT: bx lr
entry:
%shl = shl <4 x i32> %m, <i32 31, i32 31, i32 31, i32 31>
%shr = ashr exact <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
ret <4 x i32> %shr
}
define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i8(<4 x i32> %m) {
; CHECK-LABEL: sext_v4i32_v4i32_v4i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovlb.s8 q0, q0
; CHECK-NEXT: vmovlb.s16 q0, q0
; CHECK-NEXT: bx lr
entry:
%shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24>
%shr = ashr exact <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
ret <4 x i32> %shr
}
define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i16(<4 x i32> %m) {
; CHECK-LABEL: sext_v4i32_v4i32_v4i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovlb.s16 q0, q0
; CHECK-NEXT: bx lr
entry:
%shl = shl <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16>
%shr = ashr exact <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
ret <4 x i32> %shr
}
define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i8(<8 x i16> %m) {
; CHECK-LABEL: sext_v8i16_v8i16_v8i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmovlb.s8 q0, q0
; CHECK-NEXT: bx lr
entry:
%shl = shl <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%shr = ashr exact <8 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <8 x i16> %shr
}
define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i1(<8 x i16> %m) {
; CHECK-LABEL: sext_v8i16_v8i16_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vshl.i16 q0, q0, #15
; CHECK-NEXT: vshr.s16 q0, q0, #15
; CHECK-NEXT: bx lr
entry:
%shl = shl <8 x i16> %m, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%shr = ashr exact <8 x i16> %shl, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
ret <8 x i16> %shr
}
define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) {
; CHECK-LABEL: sext_v2i64_v2i64_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%shl = shl <2 x i64> %m, <i64 32, i64 32>
%shr = ashr exact <2 x i64> %shl, <i64 32, i64 32>
ret <2 x i64> %shr
}
define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) {
; CHECK-LABEL: sext_v2i64_v2i64_v2i35:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: sbfx r0, r0, #0, #3
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: sbfx r0, r0, #0, #3
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%shl = shl <2 x i64> %m, <i64 29, i64 29>
%shr = ashr exact <2 x i64> %shl, <i64 29, i64 29>
ret <2 x i64> %shr
}
define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) {
; CHECK-LABEL: sext_v8i8_v8i16:
; CHECK: @ %bb.0: @ %entry
@ -332,13 +430,13 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) {
; CHECK-LABEL: zext_v2i32_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI13_0
; CHECK-NEXT: adr r0, .LCPI20_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI13_0:
; CHECK-NEXT: .LCPI20_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff