1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

DAGCombine: Let truncates negate extension through extract-subvector

Summary:
Fold cases such as:
(v8i8 truncate (v8i32 extract_subvector (v16i32 sext (v16i8 V), Idx)))
->
(v8i8 extract_subvector (v16i8 V), Idx)

This can be generalized to cases where the truncate and extend do not
fully cancel each other out, but it may require querying the target
about profitability.

Reviewers: RKSimon, craig.topper, spatel, efriedma

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D41927

llvm-svn: 322300
This commit is contained in:
Zvi Rackover 2018-01-11 18:02:33 +00:00
parent c82c443401
commit 0f570e2658
2 changed files with 37 additions and 66 deletions

View File

@ -8779,6 +8779,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
}
// fold (truncate (extract_subvector(ext x))) ->
// (extract_subvector x)
// TODO: This can be generalized to cover cases where the truncate and extract
// do not fully cancel each other out.
if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::SIGN_EXTEND ||
N00.getOpcode() == ISD::ZERO_EXTEND ||
N00.getOpcode() == ISD::ANY_EXTEND) {
if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
VT.getVectorElementType())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
N00.getOperand(0), N0.getOperand(1));
}
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;

View File

@ -7,26 +7,13 @@
define <4 x i32> @test1(<8 x i32> %v) {
; SSE2-LABEL: test1:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; AVX2-LABEL: test1:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: test1:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x = sext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t = trunc <4 x i64> %s to <4 x i32>
@ -36,29 +23,14 @@ define <4 x i32> @test1(<8 x i32> %v) {
define <4 x i32> @test2(<8 x i32> %v) {
; SSE2-LABEL: test2:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: test2:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test2:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: test2:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x = sext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%t = trunc <4 x i64> %s to <4 x i32>
@ -164,19 +136,11 @@ define <4 x i32> @test6(<8 x i32> %v) {
; SSE2: # %bb.0:
; SSE2-NEXT: retq
;
; AVX2-LABEL: test6:
; AVX2: # %bb.0:
; AVX2-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test6:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: test6:
; AVX: # %bb.0:
; AVX-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x = zext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t = trunc <4 x i64> %s to <4 x i32>
@ -189,20 +153,11 @@ define <4 x i32> @test7(<8 x i32> %v) {
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: test7:
; AVX2: # %bb.0:
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test7:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
; AVX-LABEL: test7:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%x = zext <8 x i32> %v to <8 x i64>
%s = shufflevector <8 x i64> %x, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%t = trunc <4 x i64> %s to <4 x i32>