1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[TargetLowering] Add SimplifyDemandedBits support for ISD::INSERT_VECTOR_ELT

This helps us relax the extension of a lot of scalar elements before they are inserted into a vector.

Its exposes an issue in DAGCombiner::convertBuildVecZextToZext as some/all the zero-extensions may be relaxed to ANY_EXTEND, so we need to handle that case to avoid a couple of AVX2 VPMOVZX test regressions.

Once this is in it should be easier to fix a number of remaining failures to fold loads into VBROADCAST nodes.

Differential Revision: https://reviews.llvm.org/D59484

llvm-svn: 356989
This commit is contained in:
Simon Pilgrim 2019-03-26 12:32:01 +00:00
parent dc4a907bf7
commit b41431be80
9 changed files with 65 additions and 31 deletions

View File

@ -16824,7 +16824,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
// TODO: Support sign extend or any extend?
// TODO: Support sign extend?
// TODO: Allow undef elements?
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
@ -16832,9 +16832,12 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
EVT VT = N->getValueType(0);
bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
if (Op.getOpcode() == ISD::ZERO_EXTEND &&
unsigned Opc = Op.getOpcode();
FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@ -16866,7 +16869,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {

View File

@ -557,6 +557,44 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero &= Known2.Zero;
}
return false; // Don't fall through, will infinitely loop.
case ISD::INSERT_VECTOR_ELT: {
SDValue Vec = Op.getOperand(0);
SDValue Scl = Op.getOperand(1);
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
EVT VecVT = Vec.getValueType();
// If index isn't constant, assume we need all vector elements AND the
// inserted element.
APInt DemandedVecElts(OriginalDemandedElts);
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
unsigned Idx = CIdx->getZExtValue();
DemandedVecElts.clearBit(Idx);
// Inserted element is not required.
if (!OriginalDemandedElts[Idx])
return TLO.CombineTo(Op, Vec);
}
KnownBits KnownScl;
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
APInt DemandedSclBits = OriginalDemandedBits.zextOrTrunc(NumSclBits);
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
return true;
Known = KnownScl.zextOrTrunc(BitWidth, false);
KnownBits KnownVec;
if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
KnownVec, TLO, Depth + 1))
return true;
if (!!DemandedVecElts) {
Known.One &= KnownVec.One;
Known.Zero &= KnownVec.Zero;
}
return false;
}
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();

View File

@ -144,9 +144,9 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: shrdl $30, %ecx, %eax
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: sarl $30, %ecx
; X32-NEXT: shll $2, %eax
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
; X32-NEXT: vpsrlq $3, %xmm0, %xmm0

View File

@ -312,18 +312,10 @@ define <4 x i64> @_mul4xi32toi64c(<4 x i32>, <4 x i32>) {
; %ext0 = zext <2 x i32> %0 to <2 x i64>
; %ext1 = zext <2 x i32> %1 to <2 x i64>
define <2 x i64> @_mul2xi64toi64a(<2 x i64>, <2 x i64>) {
; SSE2-LABEL: _mul2xi64toi64a:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pmuludq %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE42-LABEL: _mul2xi64toi64a:
; SSE42: # %bb.0:
; SSE42-NEXT: pmuludq %xmm1, %xmm0
; SSE42-NEXT: retq
; SSE-LABEL: _mul2xi64toi64a:
; SSE: # %bb.0:
; SSE-NEXT: pmuludq %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: _mul2xi64toi64a:
; AVX: # %bb.0:

View File

@ -1736,7 +1736,7 @@ define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSE2: # %bb.0:
; SSE2-NEXT: movsbl (%rdi), %eax
; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
@ -1745,7 +1745,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
;
; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movsbl (%rdi), %eax
; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm1
; SSSE3-NEXT: pshufb %xmm1, %xmm0
@ -1753,7 +1753,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
;
; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
; SSE41: # %bb.0:
; SSE41-NEXT: movsbl (%rdi), %eax
; SSE41-NEXT: movzbl (%rdi), %eax
; SSE41-NEXT: movd %eax, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm1
; SSE41-NEXT: pshufb %xmm1, %xmm0
@ -1761,7 +1761,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
;
; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
; AVX1: # %bb.0:
; AVX1-NEXT: movsbl (%rdi), %eax
; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0

View File

@ -2652,7 +2652,7 @@ define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
; SSE: # %bb.0:
; SSE-NEXT: movswl (%rdi), %eax
; SSE-NEXT: movzwl (%rdi), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@ -2660,7 +2660,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
;
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX1: # %bb.0:
; AVX1-NEXT: movswl (%rdi), %eax
; AVX1-NEXT: movzwl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@ -2668,14 +2668,14 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
;
; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX2: # %bb.0:
; AVX2-NEXT: movswl (%rdi), %eax
; AVX2-NEXT: movzwl (%rdi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: movzwl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2

View File

@ -4722,7 +4722,7 @@ define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX1: # %bb.0:
; AVX1-NEXT: movswl (%rdi), %eax
; AVX1-NEXT: movzwl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
@ -4731,14 +4731,14 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
;
; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX2: # %bb.0:
; AVX2-NEXT: movswl (%rdi), %eax
; AVX2-NEXT: movzwl (%rdi), %eax
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movswl (%rdi), %eax
; AVX512VL-NEXT: movzwl (%rdi), %eax
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
; AVX512VL-NEXT: retq
%tmp = load i16, i16* %ptr, align 2

View File

@ -3154,7 +3154,7 @@ define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) {
define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
; AVX1: # %bb.0:
; AVX1-NEXT: movsbl (%rdi), %eax
; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: vmovd %eax, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0

View File

@ -233,7 +233,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
; KNL: ## %bb.0:
; KNL-NEXT: movswl (%rdi), %eax
; KNL-NEXT: movzwl (%rdi), %eax
; KNL-NEXT: vmovd %eax, %xmm0
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
; KNL-NEXT: vmovdqa %ymm0, %ymm1
@ -241,7 +241,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
;
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
; SKX: ## %bb.0:
; SKX-NEXT: movswl (%rdi), %eax
; SKX-NEXT: movzwl (%rdi), %eax
; SKX-NEXT: vpbroadcastw %eax, %zmm0
; SKX-NEXT: retq
%tmp = load i16, i16* %ptr, align 2