1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[X86] Improving folding of concat_vectors of subvectors from the same broadcast

Handle concat_vectors(extract_subvector(broadcast(x)), extract_subvector(broadcast(x))) -> broadcast(x)

To expose this we also need collectConcatOps to recognise the insert_subvector(x, extract_subvector(x, lo), hi) subvector splat pattern
This commit is contained in:
Simon Pilgrim 2020-05-01 11:22:57 +01:00
parent 40096a8dfd
commit fe80062bb3
2 changed files with 44 additions and 56 deletions

View File

@ -5728,13 +5728,21 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
// TODO - Handle more general insert_subvector chains.
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
Idx == (VT.getVectorNumElements() / 2) &&
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
Src.getOperand(1).getValueType() == SubVT &&
isNullConstant(Src.getOperand(2))) {
Ops.push_back(Src.getOperand(1));
Ops.push_back(Sub);
return true;
Idx == (VT.getVectorNumElements() / 2)) {
// insert_subvector(insert_subvector(undef, x, lo), y, hi)
if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
Src.getOperand(1).getValueType() == SubVT &&
isNullConstant(Src.getOperand(2))) {
Ops.push_back(Src.getOperand(1));
Ops.push_back(Sub);
return true;
}
// insert_subvector(x, extract_subvector(x, lo), hi)
if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
Ops.append(2, Sub);
return true;
}
}
}
@ -46679,6 +46687,15 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
(EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
// concat_vectors(extract_subvector(broadcast(x)),
// extract_subvector(broadcast(x))) -> broadcast(x)
if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op0.getOperand(0).getValueType() == VT) {
if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST ||
Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD)
return Op0.getOperand(0);
}
}
// Repeated opcode.

View File

@ -590,28 +590,16 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, i6
; XOP-LABEL: bitselect_v4i64_broadcast_rrm:
; XOP: # %bb.0:
; XOP-NEXT: vbroadcastsd (%rdi), %ymm2
; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm3
; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0
; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1
; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0
; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
; XOP-NEXT: retq
;
; AVX1-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX1: # %bb.0:
; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm3
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm2
; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vandnps %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
; AVX-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastsd (%rdi), %ymm2
; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX-NEXT: vandnps %ymm1, %ymm2, %ymm1
; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512F-LABEL: bitselect_v4i64_broadcast_rrm:
; AVX512F: # %bb.0:
@ -986,37 +974,20 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrm(<8 x i64> %a0, <8 x i64> %a1, i6
; XOP-LABEL: bitselect_v8i64_broadcast_rrm:
; XOP: # %bb.0:
; XOP-NEXT: vbroadcastsd (%rdi), %ymm4
; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm5
; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1
; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0
; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3
; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1
; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2
; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0
; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0
; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1
; XOP-NEXT: retq
;
; AVX1-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX1: # %bb.0:
; AVX1-NEXT: vbroadcastsd (%rdi), %ymm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm5
; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3
; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2
; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm4
; AVX2-NEXT: vandps %ymm4, %ymm1, %ymm1
; AVX2-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vandnps %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vorps %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vandnps %ymm2, %ymm4, %ymm2
; AVX2-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX2-NEXT: retq
; AVX-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastsd (%rdi), %ymm4
; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1
; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0
; AVX-NEXT: vandnps %ymm3, %ymm4, %ymm3
; AVX-NEXT: vorps %ymm3, %ymm1, %ymm1
; AVX-NEXT: vandnps %ymm2, %ymm4, %ymm2
; AVX-NEXT: vorps %ymm2, %ymm0, %ymm0
; AVX-NEXT: retq
;
; AVX512-LABEL: bitselect_v8i64_broadcast_rrm:
; AVX512: # %bb.0: