1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[X86][SSE] Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0)) -> EXTEND_VECTOR_INREG(X)

This commit is contained in:
Simon Pilgrim 2020-09-20 18:38:54 +01:00
parent 19d99cd594
commit 228d97b26d
2 changed files with 36 additions and 10 deletions

View File

@ -6184,6 +6184,22 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
return DAG.getBitcast(VT, Vec);
}
// Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
static unsigned getOpcode_EXTEND(unsigned Opcode) {
switch (Opcode) {
case ISD::ANY_EXTEND:
case ISD::ANY_EXTEND_VECTOR_INREG:
return ISD::ANY_EXTEND;
case ISD::ZERO_EXTEND:
case ISD::ZERO_EXTEND_VECTOR_INREG:
return ISD::ZERO_EXTEND;
case ISD::SIGN_EXTEND:
case ISD::SIGN_EXTEND_VECTOR_INREG:
return ISD::SIGN_EXTEND;
}
llvm_unreachable("Unknown opcode");
}
// Convert *_EXTEND to *_EXTEND_VECTOR_INREG opcode.
static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) {
switch (Opcode) {
@ -49258,6 +49274,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue In = N->getOperand(0);
unsigned Opcode = N->getOpcode();
unsigned InOpcode = In.getOpcode();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Try to merge vector loads and extend_inreg to an extload.
@ -49283,9 +49300,18 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
}
// Fold EXTEND_VECTOR_INREG(EXTEND_VECTOR_INREG(X)) -> EXTEND_VECTOR_INREG(X).
if (Opcode == In.getOpcode())
if (Opcode == InOpcode)
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0));
// Fold EXTEND_VECTOR_INREG(EXTRACT_SUBVECTOR(EXTEND(X),0))
// -> EXTEND_VECTOR_INREG(X).
// TODO: Handle non-zero subvector indices.
if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 &&
In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) &&
In.getOperand(0).getOperand(0).getValueSizeInBits() ==
In.getValueSizeInBits())
return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0));
// Attempt to combine as a shuffle.
// TODO: General ZERO_EXTEND_VECTOR_INREG support.
if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||

View File

@ -976,18 +976,18 @@ define void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal
define void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" {
; CHECK-LABEL: sext_v16i8_v16i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm2
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,3,2,3]
; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3
; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
; CHECK-NEXT: vpmovsxwq %xmm2, %ymm2
; CHECK-NEXT: vmovdqa %ymm2, 64(%rdi)
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
; CHECK-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
; CHECK-NEXT: vpmovsxwq %xmm3, %ymm3
; CHECK-NEXT: vpmovsxwq %xmm1, %ymm1
; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
; CHECK-NEXT: vmovdqa %ymm0, (%rdi)
; CHECK-NEXT: vmovdqa %ymm1, 64(%rdi)
; CHECK-NEXT: vmovdqa %ymm3, 96(%rdi)
; CHECK-NEXT: vmovdqa %ymm1, 32(%rdi)
; CHECK-NEXT: vmovdqa %ymm2, 32(%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%a = sext <16 x i8> %x to <16 x i64>