1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

Handle some more real world cases of rlwimi. These don't come up that

regularly in "normal" code, but for things like software graphics, they
make a big difference.

For the following code:
unsigned short Trans16Bit(unsigned srcA,unsigned srcB,unsigned alpha)
{
	unsigned tmpA,tmpB,mixed;
	tmpA = ((srcA & 0x03E0) << 15) | (srcA & 0x7C1F);
	tmpB = ((srcB & 0x03E0) << 15) | (srcB & 0x7C1F);
	mixed = (tmpA * alpha) + (tmpB * (32 - alpha));
	return ((mixed >> 5) & 0x7C1F) | ((mixed >> 20) & 0x03E0);
}

We now generate:
_Trans16Bit:
.LBB_Trans16Bit_0:      ; entry
        andi. r2, r4, 31775
        rlwimi r2, r4, 15, 7, 11
        subfic r4, r5, 32
        mullw r2, r2, r4
        andi. r4, r3, 31775
        rlwimi r4, r3, 15, 7, 11
        mullw r3, r4, r5
        add r2, r2, r3
        srwi r3, r2, 5
        andi. r3, r3, 31775
        rlwimi r3, r2, 12, 22, 26
        blr

Instead of:
_Trans16Bit:
.LBB_Trans16Bit_0:      ; entry
        slwi r2, r4, 15
        rlwinm r2, r2, 0, 7, 11
        andi. r4, r4, 31775
        or r2, r2, r4
        subfic r4, r5, 32
        mullw r2, r2, r4
        slwi r4, r3, 15
        rlwinm r4, r4, 0, 7, 11
        andi. r3, r3, 31775
        or r3, r4, r3
        mullw r3, r3, r5
        add r2, r2, r3
        srwi r3, r2, 5
        andi. r3, r3, 31775
        srwi r2, r2, 20
        rlwimi r3, r2, 0, 22, 26
        blr

llvm-svn: 22201
This commit is contained in:
Nate Begeman 2005-06-08 04:14:27 +00:00
parent 023a21ea32
commit 47cd13d42a

View File

@ -982,8 +982,12 @@ void ISel::MoveCRtoGPR(unsigned CCReg, bool Inv, unsigned Idx, unsigned Result){
bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) { bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
bool IsRotate = false; bool IsRotate = false;
unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, Amount = 0; unsigned TgtMask = 0xFFFFFFFF, InsMask = 0xFFFFFFFF, Amount = 0;
unsigned Op0Opc = OR.getOperand(0).getOpcode();
unsigned Op1Opc = OR.getOperand(1).getOpcode(); SDOperand Op0 = OR.getOperand(0);
SDOperand Op1 = OR.getOperand(1);
unsigned Op0Opc = Op0.getOpcode();
unsigned Op1Opc = Op1.getOpcode();
// Verify that we have the correct opcodes // Verify that we have the correct opcodes
if (ISD::SHL != Op0Opc && ISD::SRL != Op0Opc && ISD::AND != Op0Opc) if (ISD::SHL != Op0Opc && ISD::SRL != Op0Opc && ISD::AND != Op0Opc)
@ -993,7 +997,7 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
// Generate Mask value for Target // Generate Mask value for Target
if (ConstantSDNode *CN = if (ConstantSDNode *CN =
dyn_cast<ConstantSDNode>(OR.getOperand(0).getOperand(1).Val)) { dyn_cast<ConstantSDNode>(Op0.getOperand(1).Val)) {
switch(Op0Opc) { switch(Op0Opc) {
case ISD::SHL: TgtMask <<= (unsigned)CN->getValue(); break; case ISD::SHL: TgtMask <<= (unsigned)CN->getValue(); break;
case ISD::SRL: TgtMask >>= (unsigned)CN->getValue(); break; case ISD::SRL: TgtMask >>= (unsigned)CN->getValue(); break;
@ -1005,7 +1009,7 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
// Generate Mask value for Insert // Generate Mask value for Insert
if (ConstantSDNode *CN = if (ConstantSDNode *CN =
dyn_cast<ConstantSDNode>(OR.getOperand(1).getOperand(1).Val)) { dyn_cast<ConstantSDNode>(Op1.getOperand(1).Val)) {
switch(Op1Opc) { switch(Op1Opc) {
case ISD::SHL: case ISD::SHL:
Amount = CN->getValue(); Amount = CN->getValue();
@ -1026,27 +1030,55 @@ bool ISel::SelectBitfieldInsert(SDOperand OR, unsigned Result) {
return false; return false;
} }
unsigned Tmp3 = 0;
// If both of the inputs are ANDs and one of them has a logical shift by
// constant as its input, make that the inserted value so that we can combine
// the shift into the rotate part of the rlwimi instruction
if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
if (Op1.getOperand(0).getOpcode() == ISD::SHL ||
Op1.getOperand(0).getOpcode() == ISD::SRL) {
if (ConstantSDNode *CN =
dyn_cast<ConstantSDNode>(Op1.getOperand(0).getOperand(1).Val)) {
Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ?
CN->getValue() : 32 - CN->getValue();
Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
}
} else if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
Op0.getOperand(0).getOpcode() == ISD::SRL) {
if (ConstantSDNode *CN =
dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(1).Val)) {
std::swap(Op0, Op1);
std::swap(TgtMask, InsMask);
Amount = Op1.getOperand(0).getOpcode() == ISD::SHL ?
CN->getValue() : 32 - CN->getValue();
Tmp3 = SelectExpr(Op1.getOperand(0).getOperand(0));
}
}
}
// Verify that the Target mask and Insert mask together form a full word mask // Verify that the Target mask and Insert mask together form a full word mask
// and that the Insert mask is a run of set bits (which implies both are runs // and that the Insert mask is a run of set bits (which implies both are runs
// of set bits). Given that, Select the arguments and generate the rlwimi // of set bits). Given that, Select the arguments and generate the rlwimi
// instruction. // instruction.
unsigned MB, ME; unsigned MB, ME;
if (((TgtMask ^ InsMask) == 0xFFFFFFFF) && IsRunOfOnes(InsMask, MB, ME)) { if (((TgtMask & InsMask) == 0) && IsRunOfOnes(InsMask, MB, ME)) {
unsigned Tmp1, Tmp2; unsigned Tmp1, Tmp2;
bool fullMask = (TgtMask ^ InsMask) == 0xFFFFFFFF;
// Check for rotlwi / rotrwi here, a special case of bitfield insert // Check for rotlwi / rotrwi here, a special case of bitfield insert
// where both bitfield halves are sourced from the same value. // where both bitfield halves are sourced from the same value.
if (IsRotate && if (IsRotate && fullMask &&
OR.getOperand(0).getOperand(0) == OR.getOperand(1).getOperand(0)) { OR.getOperand(0).getOperand(0) == OR.getOperand(1).getOperand(0)) {
Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0)); Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0));
BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Amount) BuildMI(BB, PPC::RLWINM, 4, Result).addReg(Tmp1).addImm(Amount)
.addImm(0).addImm(31); .addImm(0).addImm(31);
return true; return true;
} }
if (Op0Opc == ISD::AND) if (Op0Opc == ISD::AND && fullMask)
Tmp1 = SelectExpr(OR.getOperand(0).getOperand(0)); Tmp1 = SelectExpr(Op0.getOperand(0));
else else
Tmp1 = SelectExpr(OR.getOperand(0)); Tmp1 = SelectExpr(Op0);
Tmp2 = SelectExpr(OR.getOperand(1).getOperand(0)); Tmp2 = Tmp3 ? Tmp3 : SelectExpr(Op1.getOperand(0));
BuildMI(BB, PPC::RLWIMI, 5, Result).addReg(Tmp1).addReg(Tmp2) BuildMI(BB, PPC::RLWIMI, 5, Result).addReg(Tmp1).addReg(Tmp2)
.addImm(Amount).addImm(MB).addImm(ME); .addImm(Amount).addImm(MB).addImm(ME);
return true; return true;