mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
The type-legalizer often scalarizes code. One of the common patterns is extract-and-truncate.
In this patch we optimize this pattern and convert the sequence into extract op of a narrow type. This allows the BUILD_VECTOR dag optimizations to construct efficient shuffle operations in many cases. llvm-svn: 149692
This commit is contained in:
parent
dee5f41723
commit
5c5681cf27
@ -4957,6 +4957,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
|
|||||||
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||||
SDValue N0 = N->getOperand(0);
|
SDValue N0 = N->getOperand(0);
|
||||||
EVT VT = N->getValueType(0);
|
EVT VT = N->getValueType(0);
|
||||||
|
bool isLE = TLI.isLittleEndian();
|
||||||
|
|
||||||
// noop truncate
|
// noop truncate
|
||||||
if (N0.getValueType() == N->getValueType(0))
|
if (N0.getValueType() == N->getValueType(0))
|
||||||
@ -4984,6 +4985,39 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
|||||||
return N0.getOperand(0);
|
return N0.getOperand(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fold Extract-and-trunc into a narrow extract:
|
||||||
|
// trunc(extract(x)) -> extract(bitcast(x))
|
||||||
|
// We only run this optimization after type legalization (which often
|
||||||
|
// creates this pattern) and before operation legalization after which
|
||||||
|
// we need to be more careful about the vector instructions that we generate.
|
||||||
|
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
|
||||||
|
LegalTypes && !LegalOperations && N0->hasOneUse()) {
|
||||||
|
|
||||||
|
EVT VecTy = N0.getOperand(0).getValueType();
|
||||||
|
EVT ExTy = N0.getValueType();
|
||||||
|
EVT TrTy = N->getValueType(0);
|
||||||
|
|
||||||
|
unsigned NumElem = VecTy.getVectorNumElements();
|
||||||
|
unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
|
||||||
|
|
||||||
|
EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
|
||||||
|
assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
|
||||||
|
|
||||||
|
SDValue EltNo = N0->getOperand(1);
|
||||||
|
if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
|
||||||
|
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
||||||
|
|
||||||
|
int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
|
||||||
|
|
||||||
|
SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
|
||||||
|
NVT, N0.getOperand(0));
|
||||||
|
|
||||||
|
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||||
|
N->getDebugLoc(), TrTy, V,
|
||||||
|
DAG.getConstant(Index, MVT::i32));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// See if we can simplify the input to this truncate through knowledge that
|
// See if we can simplify the input to this truncate through knowledge that
|
||||||
// only the low bits are being used.
|
// only the low bits are being used.
|
||||||
// For example "trunc (or (shl x, 8), y)" // -> trunc y
|
// For example "trunc (or (shl x, 8), y)" // -> trunc y
|
||||||
|
@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
|
; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
|
||||||
; CHECK: prom_bug
|
; CHECK: prom_bug
|
||||||
; CHECK: movd
|
|
||||||
; CHECK: shufb
|
; CHECK: shufb
|
||||||
|
; CHECK: movd
|
||||||
; CHECK: movw
|
; CHECK: movw
|
||||||
; CHECK: ret
|
; CHECK: ret
|
||||||
define void @prom_bug(<4 x i8> %t, i16* %p) {
|
define void @prom_bug(<4 x i8> %t, i16* %p) {
|
||||||
|
@ -109,3 +109,11 @@ define <4 x float> @test12(<4 x float>* %a) nounwind {
|
|||||||
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
||||||
ret <4 x float> %tmp1
|
ret <4 x float> %tmp1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;CHECK: test13
|
||||||
|
;CHECK: shufd
|
||||||
|
;CHECK: ret
|
||||||
|
define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
|
||||||
|
%x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||||
|
ret <4 x i32>%x1
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user