1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-25 05:52:53 +02:00

[X86] Reduce math for index calculation for inserting and extracting subvectors and elements by exploiting the fact that all supported vector types have a power 2 number of elements.

llvm-svn: 251740
This commit is contained in:
Craig Topper 2015-10-31 17:27:52 +00:00
parent a6fa79d960
commit e9bbc5ba7a

View File

@ -4373,19 +4373,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
* ElemsPerChunk);
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
IdxVal &= ~(ElemsPerChunk - 1);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
ElemsPerChunk));
makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
@ -4423,13 +4422,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
// we want.
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
* ElemsPerChunk);
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
IdxVal &= ~(ElemsPerChunk - 1);
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
@ -11390,10 +11389,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
//if (IdxVal >= NumElems/2)
// IdxVal -= NumElems/2;
IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
// Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
// this can be done with a mask.
IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, dl, MVT::i32));
}
@ -11529,7 +11529,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
assert(isPowerOf2_32(NumEltsIn128));
// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, dl, MVT::i32));