mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Fix an optimization involving EXTRACT_SUBVECTOR in DAGCombine so it behaves correctly. PR11494.
llvm-svn: 145996
This commit is contained in:
parent
f1b60daf50
commit
5545db0906
@ -7181,19 +7181,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
|
||||
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
// Combine:
|
||||
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
|
||||
// Into:
|
||||
// indicies are equal => V1
|
||||
// otherwise => (extract_subvec V1, ExtIdx)
|
||||
//
|
||||
SDValue InsIdx = N->getOperand(1);
|
||||
SDValue ExtIdx = V->getOperand(2);
|
||||
// Only handle cases where both indexes are constants with the same type.
|
||||
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
|
||||
|
||||
if (InsIdx == ExtIdx)
|
||||
return V->getOperand(1);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
|
||||
V->getOperand(0), N->getOperand(1));
|
||||
if (InsIdx && ExtIdx &&
|
||||
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
|
||||
ExtIdx->getValueType(0).getSizeInBits() <= 64) {
|
||||
// Combine:
|
||||
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
|
||||
// Into:
|
||||
// indices are equal => V1
|
||||
// otherwise => (extract_subvec V1, ExtIdx)
|
||||
if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
|
||||
return V->getOperand(1);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
|
||||
V->getOperand(0), N->getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
18
test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
Normal file
18
test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
; PR11494
|
||||
|
||||
define void @test(<4 x i32>* nocapture %p) nounwind {
|
||||
; CHECK: test:
|
||||
; CHECK: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
|
||||
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
|
||||
%c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
store <4 x i32> %c, <4 x i32>* %p, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
Loading…
Reference in New Issue
Block a user