mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
Fold more shifts into inserts, and update the README
llvm-svn: 28168
This commit is contained in:
parent
bbe4393bc4
commit
a706539a72
@ -392,25 +392,25 @@ static bool isIntImmediate(SDOperand N, unsigned& Imm) {
|
|||||||
/// SelectBitfieldInsert - turn an or of two masked values into
|
/// SelectBitfieldInsert - turn an or of two masked values into
|
||||||
/// the rotate left word immediate then mask insert (rlwimi) instruction.
|
/// the rotate left word immediate then mask insert (rlwimi) instruction.
|
||||||
SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
|
SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
|
||||||
unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, SH = 0;
|
|
||||||
unsigned Value;
|
|
||||||
|
|
||||||
SDOperand Op0 = N->getOperand(0);
|
SDOperand Op0 = N->getOperand(0);
|
||||||
SDOperand Op1 = N->getOperand(1);
|
SDOperand Op1 = N->getOperand(1);
|
||||||
|
|
||||||
unsigned Op0Opc = Op0.getOpcode();
|
|
||||||
unsigned Op1Opc = Op1.getOpcode();
|
|
||||||
|
|
||||||
uint64_t LKZ, LKO, RKZ, RKO;
|
uint64_t LKZ, LKO, RKZ, RKO;
|
||||||
TLI.ComputeMaskedBits(Op0, TgtMask, LKZ, LKO);
|
TLI.ComputeMaskedBits(Op0, 0xFFFFFFFFULL, LKZ, LKO);
|
||||||
TLI.ComputeMaskedBits(Op1, TgtMask, RKZ, RKO);
|
TLI.ComputeMaskedBits(Op1, 0xFFFFFFFFULL, RKZ, RKO);
|
||||||
|
|
||||||
if ((LKZ | RKZ) == 0x00000000FFFFFFFFULL) {
|
unsigned TargetMask = LKZ;
|
||||||
unsigned PInsMask = ~RKZ;
|
unsigned InsertMask = RKZ;
|
||||||
unsigned PTgtMask = ~LKZ;
|
|
||||||
|
|
||||||
// If the LHS has a foldable shift, then swap it to the RHS so that we can
|
if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
|
||||||
// fold the shift into the insert.
|
unsigned Op0Opc = Op0.getOpcode();
|
||||||
|
unsigned Op1Opc = Op1.getOpcode();
|
||||||
|
unsigned Value, SH = 0;
|
||||||
|
TargetMask = ~TargetMask;
|
||||||
|
InsertMask = ~InsertMask;
|
||||||
|
|
||||||
|
// If the LHS has a foldable shift and the RHS does not, then swap it to the
|
||||||
|
// RHS so that we can fold the shift into the insert.
|
||||||
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
|
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
|
||||||
if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
|
if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
|
||||||
Op0.getOperand(0).getOpcode() == ISD::SRL) {
|
Op0.getOperand(0).getOpcode() == ISD::SRL) {
|
||||||
@ -418,15 +418,22 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
|
|||||||
Op1.getOperand(0).getOpcode() != ISD::SRL) {
|
Op1.getOperand(0).getOpcode() != ISD::SRL) {
|
||||||
std::swap(Op0, Op1);
|
std::swap(Op0, Op1);
|
||||||
std::swap(Op0Opc, Op1Opc);
|
std::swap(Op0Opc, Op1Opc);
|
||||||
std::swap(PInsMask, PTgtMask);
|
std::swap(TargetMask, InsertMask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
|
||||||
|
if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
|
||||||
|
Op1.getOperand(0).getOpcode() != ISD::SRL) {
|
||||||
|
std::swap(Op0, Op1);
|
||||||
|
std::swap(Op0Opc, Op1Opc);
|
||||||
|
std::swap(TargetMask, InsertMask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned MB, ME;
|
unsigned MB, ME;
|
||||||
if (isRunOfOnes(PInsMask, MB, ME)) {
|
if (isRunOfOnes(InsertMask, MB, ME)) {
|
||||||
SDOperand Tmp1, Tmp2, Tmp3;
|
SDOperand Tmp1, Tmp2, Tmp3;
|
||||||
bool DisjointMask = (PTgtMask ^ PInsMask) == 0xFFFFFFFF;
|
bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF;
|
||||||
|
|
||||||
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
|
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
|
||||||
isIntImmediate(Op1.getOperand(1), Value)) {
|
isIntImmediate(Op1.getOperand(1), Value)) {
|
||||||
|
@ -516,10 +516,17 @@ _foo:
|
|||||||
srwi r4, r2, 30
|
srwi r4, r2, 30
|
||||||
srwi r5, r2, 31
|
srwi r5, r2, 31
|
||||||
or r4, r4, r5
|
or r4, r4, r5
|
||||||
slwi r4, r4, 31
|
rlwimi r2, r4, 31, 0, 0
|
||||||
rlwimi r4, r2, 0, 1, 31
|
stw r2, 0(r3)
|
||||||
stw r4, 0(r3)
|
|
||||||
blr
|
blr
|
||||||
|
|
||||||
I *think* that could use another rlwimi.
|
What this code is really doing is ORing bit 0 with bit 1. We could codegen this
|
||||||
|
as:
|
||||||
|
|
||||||
|
_foo:
|
||||||
|
lwz r2, 0(r3)
|
||||||
|
slwi r4, r2, 1
|
||||||
|
rlwinm r4, r4, 0, 0, 0
|
||||||
|
or r2, r2, r4
|
||||||
|
stw r2, 0(r3)
|
||||||
|
blr
|
||||||
|
Loading…
Reference in New Issue
Block a user