1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

Allow combining of extract_subvector to extract element

Differential Revision: https://reviews.llvm.org/D73132
This commit is contained in:
Stanislav Mekhanoshin 2020-01-21 12:27:13 -08:00
parent 0fa8b03aac
commit 973468e2e7
7 changed files with 95 additions and 102 deletions

View File

@ -18581,6 +18581,13 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
if (NewExtNumElts == 1 &&
TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
V.getOperand(0), NewIndex);
return DAG.getBitcast(NVT, NewExtract);
}
}
}
}

View File

@ -5,9 +5,8 @@ define <4 x half> @shuffle_v4f16_23uu(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_23uu:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -19,10 +18,10 @@ define <4 x half> @shuffle_v4f16_234u(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_234u:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
@ -154,7 +153,7 @@ define <4 x half> @shuffle_v4f16_0101(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_0101:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -181,9 +180,8 @@ define <4 x half> @shuffle_v4f16_0145(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_0145:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: global_load_dword v1, v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
@ -196,11 +194,9 @@ define <4 x half> @shuffle_v4f16_0167(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_0167:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: global_load_dword v1, v[2:3], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -226,9 +222,9 @@ define <4 x half> @shuffle_v4f16_2323(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_2323:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -240,10 +236,8 @@ define <4 x half> @shuffle_v4f16_2345(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_2345:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: global_load_dword v1, v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
@ -256,10 +250,9 @@ define <4 x half> @shuffle_v4f16_2367(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_2367:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: global_load_dword v1, v[2:3], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -271,10 +264,11 @@ define <4 x half> @shuffle_v4f16_4501(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_4501:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: global_load_dword v2, v[2:3], off
; GFX9-NEXT: global_load_dword v1, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -286,10 +280,11 @@ define <4 x half> @shuffle_v4f16_4523(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_4523:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dword v2, v[2:3], off
; GFX9-NEXT: global_load_dword v1, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -301,7 +296,7 @@ define <4 x half> @shuffle_v4f16_4545(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_4545:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: global_load_dword v0, v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
@ -328,11 +323,11 @@ define <4 x half> @shuffle_v4f16_6701(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_6701:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
; GFX9-NEXT: global_load_dword v1, v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -344,10 +339,11 @@ define <4 x half> @shuffle_v4f16_6723(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_6723:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
; GFX9-NEXT: global_load_dword v1, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -373,9 +369,9 @@ define <4 x half> @shuffle_v4f16_6767(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_6767:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[2:3], off
; GFX9-NEXT: global_load_dword v0, v[2:3], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -388,13 +384,12 @@ define <4 x half> @shuffle_v4f16_2356(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v0
; GFX9-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -407,11 +402,12 @@ define <4 x half> @shuffle_v4f16_5623(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dword v1, v[0:1], off offset:4
; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshl_or_b32 v0, v3, 16, v0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -485,13 +481,12 @@ define <4 x i16> @shuffle_v4i16_2356(<4 x i16> addrspace(1)* %arg0, <4 x i16> ad
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[2:3], v[2:3], off
; GFX9-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v0
; GFX9-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x i16>, <4 x i16> addrspace(1)* %arg0
%val1 = load <4 x i16>, <4 x i16> addrspace(1)* %arg1
@ -503,11 +498,9 @@ define <4 x i16> @shuffle_v4i16_0167(<4 x i16> addrspace(1)* %arg0, <4 x i16> ad
; GFX9-LABEL: shuffle_v4i16_0167:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off
; GFX9-NEXT: global_load_dword v1, v[2:3], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[2:3], off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x i16>, <4 x i16> addrspace(1)* %arg0
%val1 = load <4 x i16>, <4 x i16> addrspace(1)* %arg1
@ -590,12 +583,11 @@ define <4 x half> @shuffle_v4f16_2333(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_2333:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v0, 16, v1
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1
@ -607,12 +599,11 @@ define <4 x half> @shuffle_v4f16_6667(<4 x half> addrspace(1)* %arg0, <4 x half>
; GFX9-LABEL: shuffle_v4f16_6667:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dwordx2 v[1:2], v[0:1], off
; GFX9-NEXT: global_load_dword v0, v[0:1], off offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v2
; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v0, 16, v1
; GFX9-NEXT: v_mov_b32_e32 v0, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0
; GFX9-NEXT: v_and_b32_e32 v2, 0xffff, v1
; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
%val1 = load <4 x half>, <4 x half> addrspace(1)* %arg1

View File

@ -429,8 +429,8 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
define <2 x float> @check_f32(<4 x float> %v) nounwind {
; CHECK-LABEL: check_f32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vdup.32 d16, d17[1]
; CHECK-NEXT: vmov d16, r2, r3
; CHECK-NEXT: vdup.32 d16, d16[1]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%x = extractelement <4 x float> %v, i32 3
@ -442,8 +442,8 @@ define <2 x float> @check_f32(<4 x float> %v) nounwind {
define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
; CHECK-LABEL: check_i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vdup.32 d16, d17[1]
; CHECK-NEXT: vmov d16, r2, r3
; CHECK-NEXT: vdup.32 d16, d16[1]
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%x = extractelement <4 x i32> %v, i32 3

View File

@ -183,10 +183,10 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vext.16 d16, d16, d17, #3
; CHECK-NEXT: vorr d17, d16, d16
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vuzp.16 d16, d17
; CHECK-NEXT: vzip.16 d16, d18
; CHECK-NEXT: vorr d18, d16, d16
; CHECK-NEXT: vldr d17, [r1]
; CHECK-NEXT: vuzp.16 d16, d18
; CHECK-NEXT: vzip.16 d16, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
@ -216,17 +216,15 @@ define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
; CHECK-LABEL: test_multisource:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr d18, [r0, #32]
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: add r2, r0, #48
; CHECK-NEXT: add r0, r0, #32
; CHECK-NEXT: vorr d22, d18, d18
; CHECK-NEXT: vld1.16 {d16, d17}, [r1:128]!
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
; CHECK-NEXT: vorr d24, d20, d20
; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]
; CHECK-NEXT: vzip.16 d24, d18
; CHECK-NEXT: vtrn.16 q8, q11
; CHECK-NEXT: vext.16 d18, d20, d24, #2
; CHECK-NEXT: vldr d19, [r0, #48]
; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]
; CHECK-NEXT: vzip.16 d22, d19
; CHECK-NEXT: vtrn.16 q8, q10
; CHECK-NEXT: vext.16 d18, d18, d22, #2
; CHECK-NEXT: vext.16 d16, d18, d16, #2
; CHECK-NEXT: vext.16 d16, d16, d16, #2
; CHECK-NEXT: vmov r0, r1, d16

View File

@ -285,11 +285,11 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss
define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_s8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vext.8 d18, d16, d16, #1
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vext.8 d17, d16, d16, #1
; CHECK-NEXT: vshl.i16 d16, d16, #8
; CHECK-NEXT: vshl.i16 d18, d18, #8
; CHECK-NEXT: vshr.s16 d17, d18, #8
; CHECK-NEXT: vshl.i16 d17, d17, #8
; CHECK-NEXT: vshr.s16 d17, d17, #8
; CHECK-NEXT: vsra.s16 d17, d16, #8
; CHECK-NEXT: vstr d17, [r1]
; CHECK-NEXT: mov pc, lr
@ -347,11 +347,11 @@ define void @addCombineToVPADDLq_u8_early_zext(<16 x i8> *%cbcr, <8 x i16> *%X)
define void @addCombineToVPADDL_u8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_u8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vext.8 d18, d16, d16, #1
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vext.8 d17, d16, d16, #1
; CHECK-NEXT: vbic.i16 d16, #0xff00
; CHECK-NEXT: vbic.i16 d18, #0xff00
; CHECK-NEXT: vadd.i16 d16, d18, d16
; CHECK-NEXT: vbic.i16 d17, #0xff00
; CHECK-NEXT: vadd.i16 d16, d17, d16
; CHECK-NEXT: vstr d16, [r1]
; CHECK-NEXT: mov pc, lr
%tmp = load <16 x i8>, <16 x i8>* %cbcr
@ -368,7 +368,7 @@ define void @addCombineToVPADDL_u8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp
define void @addCombineToVPADDL_u8_early_zext(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_u8_early_zext:
; CHECK: @ %bb.0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vldr d16, [r0]
; CHECK-NEXT: vmovl.u8 q8, d16
; CHECK-NEXT: vpadd.i16 d16, d16, d17
; CHECK-NEXT: vstr d16, [r1]

View File

@ -522,12 +522,11 @@ define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
define %struct.uint8x8x2_t @vuzp_extract_subvector(<16 x i8> %t) #0 {
; CHECK-LABEL: vuzp_extract_subvector:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: vorr d18, d17, d17
; CHECK-NEXT: vuzp.8 d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d18
; CHECK-NEXT: vmov d16, r2, r3
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vuzp.8 d17, d16
; CHECK-NEXT: vmov r0, r1, d17
; CHECK-NEXT: vmov r2, r3, d16
; CHECK-NEXT: mov pc, lr
%vuzp.i = shufflevector <16 x i8> %t, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>

View File

@ -97,9 +97,8 @@ define <2 x i64> @pmovzxbq_1() nounwind {
; X86-AVX512: ## %bb.0: ## %entry
; X86-AVX512-NEXT: movl L_g16$non_lazy_ptr, %eax ## encoding: [0xa1,A,A,A,A]
; X86-AVX512-NEXT: ## fixup A - offset: 1, value: L_g16$non_lazy_ptr, kind: FK_Data_4
; X86-AVX512-NEXT: vpbroadcastw (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x00]
; X86-AVX512-NEXT: vpmovzxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
; X86-AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX512-NEXT: vpmovzxbq (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0x00]
; X86-AVX512-NEXT: ## xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
;
; X64-SSE-LABEL: pmovzxbq_1:
@ -122,9 +121,8 @@ define <2 x i64> @pmovzxbq_1() nounwind {
; X64-AVX512: ## %bb.0: ## %entry
; X64-AVX512-NEXT: movq _g16@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
; X64-AVX512-NEXT: ## fixup A - offset: 3, value: _g16@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
; X64-AVX512-NEXT: vpbroadcastw (%rax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x00]
; X64-AVX512-NEXT: vpmovzxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
; X64-AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; X64-AVX512-NEXT: vpmovzxbq (%rax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0x00]
; X64-AVX512-NEXT: ## xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X64-AVX512-NEXT: retq ## encoding: [0xc3]
entry:
%0 = load i16, i16* @g16, align 2 ; <i16> [#uses=1]