1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[LegalizeIntegerTypes] When promoting the result of an extract_vector_elt also promote the input type if necessary

By also promoting the input type we get a better idea for what scalar type to use. This can provide better results if the result of the extract is sign extended. What was previously happening is that the extract result would be legalized, sometime later the input of the sign extend would be legalized using the result of the extract. Then later the extract input would be legalized forcing a truncate into the input of the sign extend using a replace all uses. This requires DAG combine to combine out the sext/truncate pair. But sometimes we visited the truncate first and messed things up before the sext could be combined.

By creating the extract with the correct scalar type when we create legalize the result type, the truncate will be added right away. Then when the sign_extend input is legalized it will create an any_extend of the truncate which can be optimized by getNode to maybe remove the truncate. And then a sign_extend_inreg. Now DAG combine doesn't have to worry about getting rid of the extend.

This fixes the regression on X86 in D56156.

Differential Revision: https://reviews.llvm.org/D56176

llvm-svn: 350236
This commit is contained in:
Craig Topper 2019-01-02 17:58:30 +00:00
parent 4a1bd67cd3
commit 38f8fb4fa1
2 changed files with 47 additions and 38 deletions

View File

@ -441,8 +441,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
N->getOperand(1));
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
// If the input also needs to be promoted, do that first so we can get a
// get a good idea for the output type.
if (TLI.getTypeAction(*DAG.getContext(), Op0.getValueType())
== TargetLowering::TypePromoteInteger) {
SDValue In = GetPromotedInteger(Op0);
// If the new type is larger than NVT, use it. We probably won't need to
// promote it again.
EVT SVT = In.getValueType().getScalarType();
if (SVT.bitsGE(NVT)) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, In, Op1);
return DAG.getAnyExtOrTrunc(Ext, dl, NVT);
}
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, Op0, Op1);
}
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {

View File

@ -8,8 +8,8 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_1:
@ -17,8 +17,7 @@ define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrb $4, %xmm0, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -32,15 +31,14 @@ define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_ne_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pextrb $4, %xmm1, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -69,8 +67,8 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ge_1:
@ -78,8 +76,7 @@ define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm1, %xmm0
; SSE41-NEXT: pextrb $4, %xmm0, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -93,15 +90,14 @@ define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_lt_1:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_1:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm0, %xmm1
; SSE41-NEXT: pextrb $4, %xmm1, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %A, %B
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -130,8 +126,8 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_eq_2:
@ -139,8 +135,7 @@ define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: pextrb $4, %xmm1, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -154,15 +149,14 @@ define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_ne_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_ne_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pextrb $4, %xmm0, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -178,8 +172,8 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm1
; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_le_2:
@ -187,8 +181,7 @@ define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
; SSE41-NEXT: pxor %xmm0, %xmm1
; SSE41-NEXT: pextrb $4, %xmm1, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -215,15 +208,14 @@ define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_lt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_lt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pextrb $4, %xmm0, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>
@ -237,15 +229,14 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test_gt_2:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pextrw $2, %xmm0, %eax
; SSE2-NEXT: movsbl %al, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_gt_2:
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: pextrb $4, %xmm0, %eax
; SSE41-NEXT: movsbl %al, %eax
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: retq
%cmp = icmp slt <4 x i32> %B, %A
%sext = sext <4 x i1> %cmp to <4 x i32>