mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[X86][AVX] Fold store(extract_element(vtrunc)) to truncated store
Add handling for storing the extracted lower (truncated bits) element from a X86ISD::VTRUNC node - this can be lowered to a generic truncated store directly. Differential Revision: https://reviews.llvm.org/D86158
This commit is contained in:
parent
1813b6efab
commit
b05b7fd391
@ -44625,6 +44625,36 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
VT, St->getMemOperand(), DAG);
|
||||
}
|
||||
|
||||
// Try to fold a extract_element(VTRUNC) pattern into a truncating store.
|
||||
if (!St->isTruncatingStore() && StoredVal.hasOneUse()) {
|
||||
auto IsExtractedElement = [](SDValue V) {
|
||||
if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse())
|
||||
V = V.getOperand(0);
|
||||
unsigned Opc = V.getOpcode();
|
||||
if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) {
|
||||
if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1)))
|
||||
return V.getOperand(0);
|
||||
}
|
||||
return SDValue();
|
||||
};
|
||||
if (SDValue Extract = IsExtractedElement(StoredVal)) {
|
||||
SDValue Trunc = peekThroughOneUseBitcasts(Extract.getOperand(0));
|
||||
if (Trunc.getOpcode() == X86ISD::VTRUNC) {
|
||||
SDValue Src = Trunc.getOperand(0);
|
||||
MVT DstVT = Trunc.getSimpleValueType();
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
unsigned NumSrcElts = SrcVT.getVectorNumElements();
|
||||
unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts;
|
||||
MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts);
|
||||
if (NumTruncBits == VT.getSizeInBits() &&
|
||||
TLI.isTruncStoreLegal(SrcVT, TruncVT)) {
|
||||
return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(),
|
||||
TruncVT, St->getMemOperand());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
||||
// First, pack all of the elements in one place. Next, store to memory
|
||||
// in fewer chunks.
|
||||
|
@ -61,8 +61,7 @@ define void @shuffle_v16i8_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v16i8_to_v8i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <16 x i8>, <16 x i8>* %L
|
||||
%strided.vec = shufflevector <16 x i8> %vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
@ -159,8 +158,7 @@ define void @shuffle_v8i16_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v8i16_to_v4i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovdw %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v8i16_to_v4i16:
|
||||
@ -173,8 +171,7 @@ define void @shuffle_v8i16_to_v4i16(<8 x i16>* %L, <4 x i16>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v8i16_to_v4i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovdw %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovdw %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <8 x i16>, <8 x i16>* %L
|
||||
%strided.vec = shufflevector <8 x i16> %vec, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
@ -338,8 +335,7 @@ define void @shuffle_v16i8_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v16i8_to_v4i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v16i8_to_v4i8:
|
||||
@ -352,8 +348,7 @@ define void @shuffle_v16i8_to_v4i8(<16 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v16i8_to_v4i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovdb %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <16 x i8>, <16 x i8>* %L
|
||||
%strided.vec = shufflevector <16 x i8> %vec, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
|
||||
@ -456,8 +451,7 @@ define void @shuffle_v8i16_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v8i16_to_v2i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpmovqw %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovqw %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v8i16_to_v2i16:
|
||||
@ -470,8 +464,7 @@ define void @shuffle_v8i16_to_v2i16(<8 x i16>* %L, <2 x i16>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v8i16_to_v2i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovqw %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovqw %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <8 x i16>, <8 x i16>* %L
|
||||
%strided.vec = shufflevector <8 x i16> %vec, <8 x i16> undef, <2 x i32> <i32 0, i32 4>
|
||||
@ -576,8 +569,7 @@ define void @shuffle_v16i8_to_v2i8(<16 x i8>* %L, <2 x i8>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v16i8_to_v2i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vpmovqb %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpextrw $0, %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovqb %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v16i8_to_v2i8:
|
||||
@ -590,8 +582,7 @@ define void @shuffle_v16i8_to_v2i8(<16 x i8>* %L, <2 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v16i8_to_v2i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovqb %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpextrw $0, %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovqb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <16 x i8>, <16 x i8>* %L
|
||||
%strided.vec = shufflevector <16 x i8> %vec, <16 x i8> undef, <2 x i32> <i32 0, i32 8>
|
||||
|
@ -383,8 +383,7 @@ define void @shuffle_v32i8_to_v8i8(<32 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v32i8_to_v8i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovdb %ymm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -399,16 +398,14 @@ define void @shuffle_v32i8_to_v8i8(<32 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v32i8_to_v8i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: shuffle_v32i8_to_v8i8:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vpmovdb %ymm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%vec = load <32 x i8>, <32 x i8>* %L
|
||||
@ -1067,8 +1064,7 @@ define void @shuffle_v16i16_to_v4i16(<16 x i16>* %L, <4 x i16>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v16i16_to_v4i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovqw %ymm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -1083,16 +1079,14 @@ define void @shuffle_v16i16_to_v4i16(<16 x i16>* %L, <4 x i16>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v16i16_to_v4i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovqw %ymm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: shuffle_v16i16_to_v4i16:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vpmovqw %ymm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%vec = load <16 x i16>, <16 x i16>* %L
|
||||
@ -1178,8 +1172,7 @@ define void @shuffle_v32i8_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512VL-LABEL: shuffle_v32i8_to_v4i8:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vpmovqb %ymm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -1194,16 +1187,14 @@ define void @shuffle_v32i8_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-LABEL: shuffle_v32i8_to_v4i8:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vpmovqb %ymm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
;
|
||||
; AVX512VBMIVL-LABEL: shuffle_v32i8_to_v4i8:
|
||||
; AVX512VBMIVL: # %bb.0:
|
||||
; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VBMIVL-NEXT: vpmovqb %ymm0, %xmm0
|
||||
; AVX512VBMIVL-NEXT: vmovd %xmm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vpmovqb %ymm0, (%rsi)
|
||||
; AVX512VBMIVL-NEXT: vzeroupper
|
||||
; AVX512VBMIVL-NEXT: retq
|
||||
%vec = load <32 x i8>, <32 x i8>* %L
|
||||
|
@ -233,8 +233,7 @@ define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512-LABEL: shuffle_v64i8_to_v8i8:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vpmovqb %zmm0, %xmm0
|
||||
; AVX512-NEXT: vmovq %xmm0, (%rsi)
|
||||
; AVX512-NEXT: vpmovqb %zmm0, (%rsi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%vec = load <64 x i8>, <64 x i8>* %L
|
||||
|
Loading…
Reference in New Issue
Block a user