mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
Enhance folding of (extract_subvec (insert_subvec V1, V2, IIdx), EIdx)
- Handle the case where the result of 'insert_subvect' is bitcasted before 'extract_subvec'. This removes the redundant insertf128/extractf128 pair on unaligned 256-bit vector load/store on vectors of non 64-bit integer. llvm-svn: 177945
This commit is contained in:
parent
033062599e
commit
f4087706c4
@ -8990,33 +8990,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
|
||||
EVT NVT = N->getValueType(0);
|
||||
SDValue V = N->getOperand(0);
|
||||
|
||||
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
|
||||
// Handle only simple case where vector being inserted and vector
|
||||
// being extracted are of same type, and are half size of larger vectors.
|
||||
EVT BigVT = V->getOperand(0).getValueType();
|
||||
EVT SmallVT = V->getOperand(1).getValueType();
|
||||
if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
// Only handle cases where both indexes are constants with the same type.
|
||||
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
|
||||
|
||||
if (InsIdx && ExtIdx &&
|
||||
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
|
||||
ExtIdx->getValueType(0).getSizeInBits() <= 64) {
|
||||
// Combine:
|
||||
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
|
||||
// Into:
|
||||
// indices are equal => V1
|
||||
// otherwise => (extract_subvec V1, ExtIdx)
|
||||
if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
|
||||
return V->getOperand(1);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
|
||||
V->getOperand(0), N->getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
if (V->getOpcode() == ISD::CONCAT_VECTORS) {
|
||||
// Combine:
|
||||
// (extract_subvec (concat V1, V2, ...), i)
|
||||
@ -9032,6 +9005,41 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
|
||||
return V->getOperand(Idx / NumElems);
|
||||
}
|
||||
|
||||
// Skip bitcasting
|
||||
if (V->getOpcode() == ISD::BITCAST)
|
||||
V = V.getOperand(0);
|
||||
|
||||
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
// Handle only simple case where vector being inserted and vector
|
||||
// being extracted are of same type, and are half size of larger vectors.
|
||||
EVT BigVT = V->getOperand(0).getValueType();
|
||||
EVT SmallVT = V->getOperand(1).getValueType();
|
||||
if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
// Only handle cases where both indexes are constants with the same type.
|
||||
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
||||
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
|
||||
|
||||
if (InsIdx && ExtIdx &&
|
||||
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
|
||||
ExtIdx->getValueType(0).getSizeInBits() <= 64) {
|
||||
// Combine:
|
||||
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
|
||||
// Into:
|
||||
// indices are equal or bit offsets are equal => V1
|
||||
// otherwise => (extract_subvec V1, ExtIdx)
|
||||
if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
|
||||
ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
|
||||
return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
|
||||
DAG.getNode(ISD::BITCAST, dl,
|
||||
N->getOperand(0).getValueType(),
|
||||
V->getOperand(0)), N->getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -114,3 +114,16 @@ cif_mixed_test_any_check: ; preds = %cif_mask_mixed
|
||||
unreachable
|
||||
}
|
||||
|
||||
; CHECK: add8i32
|
||||
; CHECK: vmovups
|
||||
; CHECK: vmovups
|
||||
; CHECK-NOT: vinsertf128
|
||||
; CHECK-NOT: vextractf128
|
||||
; CHECK: vmovups
|
||||
; CHECK: vmovups
|
||||
define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
|
||||
%b = load <8 x i32>* %bp, align 1
|
||||
%x = add <8 x i32> zeroinitializer, %b
|
||||
store <8 x i32> %x, <8 x i32>* %ret, align 1
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user