1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

Fix an optimization involving EXTRACT_SUBVECTOR in DAGCombine so it behaves correctly. PR11494.

llvm-svn: 145996
This commit is contained in:
Eli Friedman 2011-12-07 00:11:56 +00:00
parent f1b60daf50
commit 5545db0906
2 changed files with 34 additions and 12 deletions

View File

@ -7181,19 +7181,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
return SDValue();
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indicies are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
//
SDValue InsIdx = N->getOperand(1);
SDValue ExtIdx = V->getOperand(2);
// Only handle cases where both indexes are constants with the same type.
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx == ExtIdx)
return V->getOperand(1);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
V->getOperand(0), N->getOperand(1));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
ExtIdx->getValueType(0).getSizeInBits() <= 64) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
// indices are equal => V1
// otherwise => (extract_subvec V1, ExtIdx)
if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
return V->getOperand(1);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
V->getOperand(0), N->getOperand(1));
}
}
return SDValue();

View File

@ -0,0 +1,18 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
; PR11494
define void @test(<4 x i32>* nocapture %p) nounwind {
; CHECK: test:
; CHECK: vpxor %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
; CHECK-NEXT: ret
%a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
%c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x i32> %c, <4 x i32>* %p, align 1
ret void
}
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone