mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
Revert "[PowerPC] Try to simplify a Swap if it feeds a Splat"
Revert r316478. A test case has failed. Will recommit this change once we find and fix the failure. This reverts commit 7c330fabaedaba3d02c58bc3cc1198896c895f34. llvm-svn: 316952
This commit is contained in:
parent
0ad57a67a0
commit
f59ef1bafe
@ -375,53 +375,6 @@ bool PPCMIPeephole::simplifyCode(void) {
|
||||
MI.getOperand(2).setImm(NewElem);
|
||||
}
|
||||
}
|
||||
|
||||
// Splat is fed by a SWAP which is a permute of this form
|
||||
// XXPERMDI %VA, %VA, 2
|
||||
// Since the splat instruction can use any of the vector elements to do
|
||||
// the splat we do not have to rearrange the elements in the vector
|
||||
// with a swap before we do the splat. We can simply do the splat from
|
||||
// a different index.
|
||||
// If the swap has only one use (the splat) then we can completely
|
||||
// remove the swap too.
|
||||
if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) {
|
||||
unsigned SwapRes = DefMI->getOperand(0).getReg();
|
||||
unsigned SwapOp1 = DefMI->getOperand(1).getReg();
|
||||
unsigned SwapOp2 = DefMI->getOperand(2).getReg();
|
||||
unsigned SwapImm = DefMI->getOperand(3).getImm();
|
||||
unsigned SplatImm = MI.getOperand(1).getImm();
|
||||
|
||||
// Break if this permute is not a swap.
|
||||
if (SwapOp1 != SwapOp2 || SwapImm != 2)
|
||||
break;
|
||||
|
||||
unsigned NewElem = 0;
|
||||
// Compute the new index to use for the splat.
|
||||
if (MI.getOpcode() == PPC::VSPLTB)
|
||||
NewElem = (SplatImm + 8) & 0xF;
|
||||
else if (MI.getOpcode() == PPC::VSPLTH)
|
||||
NewElem = (SplatImm + 4) & 0x7;
|
||||
else if (MI.getOpcode() == PPC::XXSPLTW)
|
||||
NewElem = (SplatImm + 2) & 0x3;
|
||||
else {
|
||||
DEBUG(dbgs() << "Unknown splat opcode.");
|
||||
DEBUG(MI.dump());
|
||||
break;
|
||||
}
|
||||
|
||||
if (MRI->hasOneNonDBGUse(SwapRes)) {
|
||||
DEBUG(dbgs() << "Removing redundant swap: ");
|
||||
DEBUG(DefMI->dump());
|
||||
ToErase = DefMI;
|
||||
}
|
||||
Simplified = true;
|
||||
DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
|
||||
" to " << NewElem << " in instruction: ");
|
||||
DEBUG(MI.dump());
|
||||
MI.getOperand(1).setImm(NewElem);
|
||||
MI.getOperand(2).setReg(SwapOp1);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case PPC::XVCVDPSP: {
|
||||
|
@ -16,7 +16,7 @@ entry:
|
||||
; CHECK: sldi [[REG1:[0-9]+]], 3, 56
|
||||
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
|
||||
; CHECK-LE: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
|
||||
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
@ -28,7 +28,7 @@ entry:
|
||||
; CHECK: sldi [[REG1:[0-9]+]], 3, 48
|
||||
; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
|
||||
; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
|
||||
; CHECK-LE: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
|
||||
; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
|
||||
}
|
||||
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
|
@ -1,134 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-PWR8
|
||||
|
||||
; The strightforward expansion of this code will result in a swap followed by a
|
||||
; splat. However, the swap is not needed since in this case the splat is the
|
||||
; only use.
|
||||
; We want to check that we are not using the swap and that we have indexed the
|
||||
; splat to the correct location.
|
||||
; 8 Bit Signed Version of the test.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <16 x i8> @splat_8_plus(<16 x i8> %v, i8 signext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%add = add <16 x i8> %splat.splat.i, %v
|
||||
ret <16 x i8> %add
|
||||
; CHECK-LABEL: splat_8_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_8_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; 8 Bit Unsigned Version of the test.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <16 x i8> @splat_u8_plus(<16 x i8> %v, i8 zeroext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%splat.splatinsert.i = insertelement <16 x i8> undef, i8 %c, i32 0
|
||||
%splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> undef, <16 x i32> zeroinitializer
|
||||
%add = add <16 x i8> %splat.splat.i, %v
|
||||
ret <16 x i8> %add
|
||||
; CHECK-LABEL: splat_u8_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_u8_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: vspltb {{[0-9]+}}, {{[0-9]+}}, 7
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; 16 Bit Signed Version of the test.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <8 x i16> @splat_16_plus(<8 x i16> %v, i16 signext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%0 = shl i16 %c, 8
|
||||
%conv.i = ashr exact i16 %0, 8
|
||||
%splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
|
||||
%splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%add = add <8 x i16> %splat.splat.i, %v
|
||||
ret <8 x i16> %add
|
||||
; CHECK-LABEL: splat_16_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_16_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; 16 Bit Unsigned Version of the test.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <8 x i16> @splat_u16_plus(<8 x i16> %v, i16 zeroext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%0 = shl i16 %c, 8
|
||||
%conv.i = ashr exact i16 %0, 8
|
||||
%splat.splatinsert.i = insertelement <8 x i16> undef, i16 %conv.i, i32 0
|
||||
%splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> undef, <8 x i32> zeroinitializer
|
||||
%add = add <8 x i16> %splat.splat.i, %v
|
||||
ret <8 x i16> %add
|
||||
; CHECK-LABEL: splat_u16_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_u16_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: vsplth {{[0-9]+}}, {{[0-9]+}}, 3
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; 32 Bit Signed Version of the test.
|
||||
; The 32 bit examples work differently than the 8 and 16 bit versions of the
|
||||
; test. On Power 9 we have the mtvsrws instruction that does both the move to
|
||||
; register and the splat so it does not really test the newly implemented code.
|
||||
; On Power 9 for the 32 bit case we don't need the new simplification. It is
|
||||
; just here for completeness.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <4 x i32> @splat_32_plus(<4 x i32> %v, i32 signext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%sext = shl i32 %c, 24
|
||||
%conv.i = ashr exact i32 %sext, 24
|
||||
%splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
|
||||
%splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%add = add <4 x i32> %splat.splat.i, %v
|
||||
ret <4 x i32> %add
|
||||
; CHECK-LABEL: splat_32_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_32_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
||||
; 32 Bit Unsigned Version of the test.
|
||||
; The 32 bit examples work differently than the 8 and 16 bit versions of the
|
||||
; test. On Power 9 we have the mtvsrws instruction that does both the move to
|
||||
; register and the splat so it does not really test the newly implemented code.
|
||||
; On Power 9 for the 32 bit case we don't need the new simplification. It is
|
||||
; just here for completeness.
|
||||
; Function Attrs: norecurse nounwind readnone
|
||||
define <4 x i32> @splat_u32_plus(<4 x i32> %v, i32 zeroext %c) local_unnamed_addr {
|
||||
entry:
|
||||
%sext = shl i32 %c, 24
|
||||
%conv.i = ashr exact i32 %sext, 24
|
||||
%splat.splatinsert.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
|
||||
%splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
%add = add <4 x i32> %splat.splat.i, %v
|
||||
ret <4 x i32> %add
|
||||
; CHECK-LABEL: splat_u32_plus
|
||||
; CHECK-NOT: xxswapd
|
||||
; CHECK: mtvsrws {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK: blr
|
||||
; CHECK-PWR8-LABEL: splat_u32_plus
|
||||
; CHECK-PWR8-NOT: xxswapd
|
||||
; CHECK-PWR8: xxspltw {{[0-9]+}}, {{[0-9]+}}, 1
|
||||
; CHECK-PWR8: blr
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user