mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86] When lowering extending loads from v2i1/v4i1, if we have VLX, use a narrower extend.
Previously we used an extend from v8i1 to v8i32/v8i64. Then extracted to the final width. But if we have VLX we should extract first. This way we don't end up with an overly large extend. This allows us to use vcmpeq to make all ones for the sign extend when DQI isn't available. Otherwise we get a VPTERNLOG. If we make v2i1/v4i1 legal like proposed in D41560, we could always do this and rely on the lowering of the extend to widen when necessary. llvm-svn: 321538
This commit is contained in:
parent
bc8b79278a
commit
750df90652
@ -18679,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
|
||||
// Replace chain users with the new chain.
|
||||
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
|
||||
if (Subtarget.hasVLX()) {
|
||||
// Extract to v4i1/v2i1.
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// Finally, do a normal sign-extend to the desired register.
|
||||
return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
|
||||
}
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
|
||||
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
|
||||
|
||||
@ -18698,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
|
||||
|
||||
if (NumElts <= 8) {
|
||||
// A subset, assume that we have only AVX-512F
|
||||
unsigned NumBitsToLoad = 8;
|
||||
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
|
||||
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
|
||||
SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),
|
||||
Ld->getBasePtr(),
|
||||
Ld->getMemOperand());
|
||||
// Replace chain users with the new chain.
|
||||
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
|
||||
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
|
||||
SDValue BitVec = DAG.getBitcast(MaskVT, Load);
|
||||
SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);
|
||||
|
||||
if (NumElts == 8)
|
||||
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
|
||||
|
||||
// we should take care to v4i1 and v2i1
|
||||
if (Subtarget.hasVLX()) {
|
||||
// Extract to v4i1/v2i1.
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// Finally, do a normal sign-extend to the desired register.
|
||||
return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
|
||||
}
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
|
||||
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
|
||||
|
@ -48,9 +48,8 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
|
||||
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%1 = bitcast i2 %a0 to <2 x i1>
|
||||
%2 = sext <2 x i1> %1 to <2 x i64>
|
||||
@ -91,10 +90,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
|
||||
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%1 = bitcast i4 %a0 to <4 x i1>
|
||||
%2 = sext <4 x i1> %1 to <4 x i32>
|
||||
@ -246,8 +243,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
|
||||
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
|
||||
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%1 = bitcast i4 %a0 to <4 x i1>
|
||||
%2 = sext <4 x i1> %1 to <4 x i64>
|
||||
|
@ -63,9 +63,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
|
||||
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; AVX512VLBW-NEXT: vzeroupper
|
||||
; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
%1 = bitcast i2 %a0 to <2 x i1>
|
||||
%2 = zext <2 x i1> %1 to <2 x i64>
|
||||
@ -120,9 +118,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
|
||||
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512VLBW-NEXT: vzeroupper
|
||||
; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
%1 = bitcast i4 %a0 to <4 x i1>
|
||||
%2 = zext <4 x i1> %1 to <4 x i32>
|
||||
@ -317,8 +313,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
|
||||
; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
|
||||
; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
%1 = bitcast i4 %a0 to <4 x i1>
|
||||
%2 = zext <4 x i1> %1 to <4 x i64>
|
||||
|
@ -46,9 +46,8 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) {
|
||||
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%1 = bitcast i2 %a0 to <2 x i1>
|
||||
ret <2 x i1> %1
|
||||
@ -90,10 +89,8 @@ define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) {
|
||||
; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
|
||||
; AVX512-NEXT: kmovd %eax, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: retq
|
||||
%1 = bitcast i4 %a0 to <4 x i1>
|
||||
ret <4 x i1> %1
|
||||
|
Loading…
Reference in New Issue
Block a user