mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[TargetLowering] Add SimplifyDemandedBits support for ISD::INSERT_VECTOR_ELT
This helps us relax the extension of a lot of scalar elements before they are inserted into a vector. Its exposes an issue in DAGCombiner::convertBuildVecZextToZext as some/all the zero-extensions may be relaxed to ANY_EXTEND, so we need to handle that case to avoid a couple of AVX2 VPMOVZX test regressions. Once this is in it should be easier to fix a number of remaining failures to fold loads into VBROADCAST nodes. Differential Revision: https://reviews.llvm.org/D59484 llvm-svn: 356989
This commit is contained in:
parent
dc4a907bf7
commit
b41431be80
@ -16824,7 +16824,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
|
||||
|
||||
// Try to turn a build vector of zero extends of extract vector elts into a
|
||||
// a vector zero extend and possibly an extract subvector.
|
||||
// TODO: Support sign extend or any extend?
|
||||
// TODO: Support sign extend?
|
||||
// TODO: Allow undef elements?
|
||||
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
|
||||
if (LegalOperations)
|
||||
@ -16832,9 +16832,12 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
bool FoundZeroExtend = false;
|
||||
SDValue Op0 = N->getOperand(0);
|
||||
auto checkElem = [&](SDValue Op) -> int64_t {
|
||||
if (Op.getOpcode() == ISD::ZERO_EXTEND &&
|
||||
unsigned Opc = Op.getOpcode();
|
||||
FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
|
||||
if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
|
||||
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
|
||||
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
|
||||
@ -16866,7 +16869,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
|
||||
SDLoc DL(N);
|
||||
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
|
||||
Op0.getOperand(0).getOperand(1));
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
|
||||
return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
|
||||
VT, In);
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
|
@ -557,6 +557,44 @@ bool TargetLowering::SimplifyDemandedBits(
|
||||
Known.Zero &= Known2.Zero;
|
||||
}
|
||||
return false; // Don't fall through, will infinitely loop.
|
||||
case ISD::INSERT_VECTOR_ELT: {
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDValue Scl = Op.getOperand(1);
|
||||
auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||
EVT VecVT = Vec.getValueType();
|
||||
|
||||
// If index isn't constant, assume we need all vector elements AND the
|
||||
// inserted element.
|
||||
APInt DemandedVecElts(OriginalDemandedElts);
|
||||
if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
|
||||
unsigned Idx = CIdx->getZExtValue();
|
||||
DemandedVecElts.clearBit(Idx);
|
||||
|
||||
// Inserted element is not required.
|
||||
if (!OriginalDemandedElts[Idx])
|
||||
return TLO.CombineTo(Op, Vec);
|
||||
}
|
||||
|
||||
KnownBits KnownScl;
|
||||
unsigned NumSclBits = Scl.getScalarValueSizeInBits();
|
||||
APInt DemandedSclBits = OriginalDemandedBits.zextOrTrunc(NumSclBits);
|
||||
if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
Known = KnownScl.zextOrTrunc(BitWidth, false);
|
||||
|
||||
KnownBits KnownVec;
|
||||
if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts,
|
||||
KnownVec, TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
if (!!DemandedVecElts) {
|
||||
Known.One &= KnownVec.One;
|
||||
Known.Zero &= KnownVec.Zero;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
case ISD::CONCAT_VECTORS: {
|
||||
Known.Zero.setAllBits();
|
||||
Known.One.setAllBits();
|
||||
|
@ -144,9 +144,9 @@ define float @signbits_ashr_insert_ashr_extract_sitofp(i64 %a0, i64 %a1) nounwin
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: shrdl $30, %ecx, %eax
|
||||
; X32-NEXT: movl %eax, %ecx
|
||||
; X32-NEXT: sarl $30, %ecx
|
||||
; X32-NEXT: shll $2, %eax
|
||||
; X32-NEXT: vmovd %eax, %xmm0
|
||||
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsrlq $3, %xmm0, %xmm0
|
||||
|
@ -312,18 +312,10 @@ define <4 x i64> @_mul4xi32toi64c(<4 x i32>, <4 x i32>) {
|
||||
; %ext0 = zext <2 x i32> %0 to <2 x i64>
|
||||
; %ext1 = zext <2 x i32> %1 to <2 x i64>
|
||||
define <2 x i64> @_mul2xi64toi64a(<2 x i64>, <2 x i64>) {
|
||||
; SSE2-LABEL: _mul2xi64toi64a:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
|
||||
; SSE2-NEXT: pand %xmm2, %xmm0
|
||||
; SSE2-NEXT: pand %xmm2, %xmm1
|
||||
; SSE2-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE2-NEXT: retq
|
||||
;
|
||||
; SSE42-LABEL: _mul2xi64toi64a:
|
||||
; SSE42: # %bb.0:
|
||||
; SSE42-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE42-NEXT: retq
|
||||
; SSE-LABEL: _mul2xi64toi64a:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: pmuludq %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: _mul2xi64toi64a:
|
||||
; AVX: # %bb.0:
|
||||
|
@ -1736,7 +1736,7 @@ define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
|
||||
define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSE2: # %bb.0:
|
||||
; SSE2-NEXT: movsbl (%rdi), %eax
|
||||
; SSE2-NEXT: movzbl (%rdi), %eax
|
||||
; SSE2-NEXT: movd %eax, %xmm0
|
||||
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
@ -1745,7 +1745,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
;
|
||||
; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSSE3: # %bb.0:
|
||||
; SSSE3-NEXT: movsbl (%rdi), %eax
|
||||
; SSSE3-NEXT: movzbl (%rdi), %eax
|
||||
; SSSE3-NEXT: movd %eax, %xmm0
|
||||
; SSSE3-NEXT: pxor %xmm1, %xmm1
|
||||
; SSSE3-NEXT: pshufb %xmm1, %xmm0
|
||||
@ -1753,7 +1753,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
;
|
||||
; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; SSE41: # %bb.0:
|
||||
; SSE41-NEXT: movsbl (%rdi), %eax
|
||||
; SSE41-NEXT: movzbl (%rdi), %eax
|
||||
; SSE41-NEXT: movd %eax, %xmm0
|
||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE41-NEXT: pshufb %xmm1, %xmm0
|
||||
@ -1761,7 +1761,7 @@ define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||
; AVX1-NEXT: movzbl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
|
@ -2652,7 +2652,7 @@ define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
|
||||
define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
; SSE-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: movswl (%rdi), %eax
|
||||
; SSE-NEXT: movzwl (%rdi), %eax
|
||||
; SSE-NEXT: movd %eax, %xmm0
|
||||
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
@ -2660,7 +2660,7 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
;
|
||||
; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movswl (%rdi), %eax
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
@ -2668,14 +2668,14 @@ define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movswl (%rdi), %eax
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movswl (%rdi), %eax
|
||||
; AVX512VL-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
|
@ -4722,7 +4722,7 @@ define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
|
||||
define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movswl (%rdi), %eax
|
||||
; AVX1-NEXT: movzwl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||
@ -4731,14 +4731,14 @@ define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
|
||||
;
|
||||
; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: movswl (%rdi), %eax
|
||||
; AVX2-NEXT: movzwl (%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: movswl (%rdi), %eax
|
||||
; AVX512VL-NEXT: movzwl (%rdi), %eax
|
||||
; AVX512VL-NEXT: vpbroadcastw %eax, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
|
@ -3154,7 +3154,7 @@ define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) {
|
||||
define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
|
||||
; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||
; AVX1-NEXT: movzbl (%rdi), %eax
|
||||
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
|
||||
|
@ -233,7 +233,7 @@ define <32 x i16> @insert_dup_mem_v32i16_i32(i32* %ptr) {
|
||||
define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
|
||||
; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movswl (%rdi), %eax
|
||||
; KNL-NEXT: movzwl (%rdi), %eax
|
||||
; KNL-NEXT: vmovd %eax, %xmm0
|
||||
; KNL-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; KNL-NEXT: vmovdqa %ymm0, %ymm1
|
||||
@ -241,7 +241,7 @@ define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
|
||||
;
|
||||
; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
|
||||
; SKX: ## %bb.0:
|
||||
; SKX-NEXT: movswl (%rdi), %eax
|
||||
; SKX-NEXT: movzwl (%rdi), %eax
|
||||
; SKX-NEXT: vpbroadcastw %eax, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
%tmp = load i16, i16* %ptr, align 2
|
||||
|
Loading…
x
Reference in New Issue
Block a user