1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[PowerPC] Partially enable the ISEL expansion pass.

The pass to expand ISEL instructions into if-then-else sequences in patch D23630
is currently disabled. This patch partially enable it by always removing the
unnecessary ISELs (all registers used by the ISELs are the same one) and folding
the ISELs which have the same input registers into unconditional copies.

Differential Revision: https://reviews.llvm.org/D40497

llvm-svn: 320414
This commit is contained in:
Tony Jiang 2017-12-11 20:42:37 +00:00
parent 743ca2d884
commit 297ca0894d
6 changed files with 293 additions and 95 deletions

View File

@ -59,6 +59,8 @@ class PPCExpandISEL : public MachineFunctionPass {
typedef SmallDenseMap<int, BlockISELList> ISELInstructionList;
// A map of MBB numbers to their lists of contained ISEL instructions.
// Please note when we traverse this list and expand ISEL, we only remove
// the ISEL from the MBB not from this list.
ISELInstructionList ISELInstructions;
/// Initialize the object.
@ -124,9 +126,6 @@ public:
#endif
bool runOnMachineFunction(MachineFunction &MF) override {
if (!isExpandISELEnabled(MF))
return false;
DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
initialize(MF);
@ -190,30 +189,71 @@ bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) {
}
void PPCExpandISEL::expandAndMergeISELs() {
for (auto &BlockList : ISELInstructions) {
bool ExpandISELEnabled = isExpandISELEnabled(*MF);
DEBUG(dbgs() << printMBBReference(*MF->getBlockNumbered(BlockList.first))
<< ":\n");
for (auto &BlockList : ISELInstructions) {
DEBUG(dbgs() << "Expanding ISEL instructions in "
<< printMBBReference(*MF->getBlockNumbered(BlockList.first))
<< "\n");
BlockISELList &CurrentISELList = BlockList.second;
auto I = CurrentISELList.begin();
auto E = CurrentISELList.end();
while (I != E) {
BlockISELList SubISELList;
assert(isISEL(**I) && "Expecting an ISEL instruction");
MachineOperand &Dest = (*I)->getOperand(0);
MachineOperand &TrueValue = (*I)->getOperand(1);
MachineOperand &FalseValue = (*I)->getOperand(2);
SubISELList.push_back(*I++);
// Collect the ISELs that can be merged together.
while (I != E && canMerge(SubISELList.back(), *I))
// Special case 1, all registers used by ISEL are the same one.
// The non-redundant isel 0, 0, 0, N would not satisfy these conditions
// as it would be ISEL %R0, %ZERO, %R0, %CRN.
if (useSameRegister(Dest, TrueValue) &&
useSameRegister(Dest, FalseValue)) {
DEBUG(dbgs() << "Remove redudant ISEL instruction: " << **I << "\n");
// FIXME: if the CR field used has no other uses, we could eliminate the
// instruction that defines it. This would have to be done manually
// since this pass runs too late to run DCE after it.
NumRemoved++;
(*I)->eraseFromParent();
I++;
} else if (useSameRegister(TrueValue, FalseValue)) {
// Special case 2, the two input registers used by ISEL are the same.
// Note: the non-foldable isel RX, 0, 0, N would not satisfy this
// condition as it would be ISEL %RX, %ZERO, %R0, %CRN, which makes it
// safe to fold ISEL to MR(OR) instead of ADDI.
MachineBasicBlock *MBB = (*I)->getParent();
DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy:\n");
DEBUG(dbgs() << "ISEL: " << **I << "\n");
NumFolded++;
// Note: we're using both the TrueValue and FalseValue operands so as
// not to lose the kill flag if it is set on either of them.
BuildMI(*MBB, (*I), dl, TII->get(isISEL8(**I) ? PPC::OR8 : PPC::OR))
.add(Dest)
.add(TrueValue)
.add(FalseValue);
(*I)->eraseFromParent();
I++;
} else if (ExpandISELEnabled) { // Normal cases expansion enabled
DEBUG(dbgs() << "Expand ISEL instructions:\n");
DEBUG(dbgs() << "ISEL: " << **I << "\n");
BlockISELList SubISELList;
SubISELList.push_back(*I++);
// Collect the ISELs that can be merged together.
// This will eat up ISEL instructions without considering whether they
// may be redundant or foldable to a register copy. So we still keep
// the handleSpecialCases() downstream to handle them.
while (I != E && canMerge(SubISELList.back(), *I)) {
DEBUG(dbgs() << "ISEL: " << **I << "\n");
SubISELList.push_back(*I++);
}
expandMergeableISELs(SubISELList);
}
}
expandMergeableISELs(SubISELList);
} else { // Normal cases expansion disabled
I++; // leave the ISEL as it is
}
} // end while
} // end for
}
void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
@ -236,13 +276,15 @@ void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
// Similarly, if at least one of the ISEL instructions satisfy the
// following condition, we need the False Block:
// The Dest Register and False Value Register are not the same.
bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
// Special case 1, all registers used by ISEL are the same one.
if (!IsADDIInstRequired && !IsORIInstRequired) {
DEBUG(dbgs() << "Remove redudant ISEL instruction.");
// FIXME: if the CR field used has no other uses, we could eliminate the
// instruction that defines it. This would have to be done manually
// since this pass runs too late to run DCE after it.
NumRemoved++;
(*MI)->eraseFromParent();
// Setting MI to the erase result keeps the iterator valid and increased.
@ -257,14 +299,15 @@ void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
// PPC::ZERO8 will be used for the first operand if the value is meant to
// be zero. In this case, the useSameRegister method will return false,
// thereby preventing this ISEL from being folded.
if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) {
DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy.");
NumFolded++;
BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI))
// Note: we're using both the TrueValue and FalseValue operands so as
// not to lose the kill flag if it is set on either of them.
BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::OR8 : PPC::OR))
.add(Dest)
.add(TrueValue)
.add(MachineOperand::CreateImm(0));
.add(FalseValue);
(*MI)->eraseFromParent();
// Setting MI to the erase result keeps the iterator valid and increased.
MI = BIL.erase(MI);

View File

@ -1,55 +1,66 @@
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; This file mainly tests that one of the ISEL instruction in the group uses the same register for operand RT, RA, RB
; This redudant ISEL is introduced during simple register coalescing stage.
; Simple register coalescing first create the foldable ISEL instruction as we have seen in expand-foldable-isel.ll:
; %vreg85<def> = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq
; Later the register coalescer figures out it could further coalesce %vreg85 with %vreg83:
; merge %vreg85:1@2288r into %vreg83:5@400B --> @400B
; erased: 2288r %vreg85<def> = COPY %vreg83
; After that we have:
; updated: 1504B %vreg83<def> = ISEL8 %vreg83, %vreg83, %vreg33:sub_eq
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
; Function Attrs: norecurse nounwind readnone
@.str = private unnamed_addr constant [3 x i8] c"]]\00", align 1
@.str.1 = private unnamed_addr constant [35 x i8] c"Index < Length && \22Invalid index!\22\00", align 1
@.str.2 = private unnamed_addr constant [50 x i8] c"/home/jtony/src/llvm/include/llvm/ADT/StringRef.h\00", align 1
@__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm = private unnamed_addr constant [47 x i8] c"char llvm::StringRef::operator[](size_t) const\00", align 1
@.str.3 = private unnamed_addr constant [95 x i8] c"(data || length == 0) && \22StringRef cannot be built from a NULL argument with non-null length\22\00", align 1
@__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm = private unnamed_addr constant [49 x i8] c"llvm::StringRef::StringRef(const char *, size_t)\00", align 1
; Function Attrs: nounwind
define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) local_unnamed_addr #0 {
define i64 @_Z3fn1N4llvm9StringRefE([2 x i64] %Str.coerce) {
entry:
%Str.coerce.fca.0.extract = extractvalue [2 x i64] %Str.coerce, 0
%Str.coerce.fca.1.extract = extractvalue [2 x i64] %Str.coerce, 1
br label %while.cond.outer
while.cond.outer: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit, %entry
while.cond.outer:
%Str.sroa.0.0.ph = phi i64 [ %8, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.0.extract, %entry ]
%.sink.ph = phi i64 [ %sub.i, %_ZNK4llvm9StringRef6substrEmm.exit ], [ %Str.coerce.fca.1.extract, %entry ]
%BracketDepth.0.ph = phi i64 [ %BracketDepth.1, %_ZNK4llvm9StringRef6substrEmm.exit ], [ undef, %entry ]
%cmp65 = icmp eq i64 %BracketDepth.0.ph, 0
br i1 %cmp65, label %while.cond.us.preheader, label %while.cond.preheader
while.cond.us.preheader: ; preds = %while.cond.outer
while.cond.us.preheader:
br label %while.cond.us
while.cond.preheader: ; preds = %while.cond.outer
while.cond.preheader:
%cmp.i34129 = icmp eq i64 %.sink.ph, 0
br i1 %cmp.i34129, label %cond.false.i.loopexit135, label %_ZNK4llvm9StringRefixEm.exit.preheader
_ZNK4llvm9StringRefixEm.exit.preheader: ; preds = %while.cond.preheader
_ZNK4llvm9StringRefixEm.exit.preheader:
br label %_ZNK4llvm9StringRefixEm.exit
while.cond.us: ; preds = %while.cond.us.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50.us
while.cond.us:
%Str.sroa.0.0.us = phi i64 [ %3, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %Str.sroa.0.0.ph, %while.cond.us.preheader ]
%.sink.us = phi i64 [ %sub.i41.us, %_ZNK4llvm9StringRef6substrEmm.exit50.us ], [ %.sink.ph, %while.cond.us.preheader ]
%cmp.i30.us = icmp ult i64 %.sink.us, 2
br i1 %cmp.i30.us, label %if.end.us, label %if.end.i.i.us
if.end.i.i.us: ; preds = %while.cond.us
if.end.i.i.us:
%0 = inttoptr i64 %Str.sroa.0.0.us to i8*
%call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2) #3
%call.i.i.us = tail call signext i32 @memcmp(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i64 2)
%phitmp.i.us = icmp eq i32 %call.i.i.us, 0
br i1 %phitmp.i.us, label %if.then, label %_ZNK4llvm9StringRefixEm.exit.us
if.end.us: ; preds = %while.cond.us
if.end.us:
%cmp.i34.us = icmp eq i64 %.sink.us, 0
br i1 %cmp.i34.us, label %cond.false.i.loopexit, label %_ZNK4llvm9StringRefixEm.exit.us
_ZNK4llvm9StringRefixEm.exit.us: ; preds = %if.end.i.i.us, %if.end.us
_ZNK4llvm9StringRefixEm.exit.us:
%1 = inttoptr i64 %Str.sroa.0.0.us to i8*
%2 = load i8, i8* %1, align 1, !tbaa !2
%2 = load i8, i8* %1, align 1
switch i8 %2, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit [
i8 92, label %if.then4.us
i8 93, label %if.then9
]
if.then4.us: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
if.then4.us:
%.sroa.speculated12.i38.us = select i1 %cmp.i30.us, i64 %.sink.us, i64 2
%add.ptr.i40.us = getelementptr inbounds i8, i8* %1, i64 %.sroa.speculated12.i38.us
%sub.i41.us = sub i64 %.sink.us, %.sroa.speculated12.i38.us
@ -57,30 +68,30 @@ if.then4.us: ; preds = %_ZNK4llvm9StringRef
%cmp.i4.i45.us = icmp eq i64 %sub.i41.us, 0
%or.cond.i.i46.us = or i1 %tobool.i.i44.us, %cmp.i4.i45.us
br i1 %or.cond.i.i46.us, label %_ZNK4llvm9StringRef6substrEmm.exit50.us, label %cond.false.i.i47.loopexit
_ZNK4llvm9StringRef6substrEmm.exit50.us: ; preds = %if.then4.us
_ZNK4llvm9StringRef6substrEmm.exit50.us:
%3 = ptrtoint i8* %add.ptr.i40.us to i64
br label %while.cond.us
if.then: ; preds = %if.end.i.i.us
if.then:
ret i64 undef
cond.false.i.loopexit: ; preds = %if.end.us
cond.false.i.loopexit:
br label %cond.false.i
cond.false.i.loopexit134: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit50
cond.false.i.loopexit134:
br label %cond.false.i
cond.false.i.loopexit135: ; preds = %while.cond.preheader
cond.false.i.loopexit135:
br label %cond.false.i
cond.false.i: ; preds = %cond.false.i.loopexit135, %cond.false.i.loopexit134, %cond.false.i.loopexit
tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0)) #4
cond.false.i:
tail call void @__assert_fail(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 225, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm9StringRefixEm, i64 0, i64 0))
unreachable
_ZNK4llvm9StringRefixEm.exit: ; preds = %_ZNK4llvm9StringRefixEm.exit.preheader, %_ZNK4llvm9StringRef6substrEmm.exit50
_ZNK4llvm9StringRefixEm.exit:
%.sink131 = phi i64 [ %sub.i41, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %.sink.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ]
%Str.sroa.0.0130 = phi i64 [ %6, %_ZNK4llvm9StringRef6substrEmm.exit50 ], [ %Str.sroa.0.0.ph, %_ZNK4llvm9StringRefixEm.exit.preheader ]
%4 = inttoptr i64 %Str.sroa.0.0130 to i8*
%5 = load i8, i8* %4, align 1, !tbaa !2
%5 = load i8, i8* %4, align 1
switch i8 %5, label %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 [
i8 92, label %if.then4
i8 93, label %if.end10
]
if.then4: ; preds = %_ZNK4llvm9StringRefixEm.exit
if.then4:
%cmp.i.i37 = icmp ult i64 %.sink131, 2
%.sroa.speculated12.i38 = select i1 %cmp.i.i37, i64 %.sink131, i64 2
%add.ptr.i40 = getelementptr inbounds i8, i8* %4, i64 %.sroa.speculated12.i38
@ -89,28 +100,28 @@ if.then4: ; preds = %_ZNK4llvm9StringRef
%cmp.i4.i45 = icmp eq i64 %sub.i41, 0
%or.cond.i.i46 = or i1 %tobool.i.i44, %cmp.i4.i45
br i1 %or.cond.i.i46, label %_ZNK4llvm9StringRef6substrEmm.exit50, label %cond.false.i.i47.loopexit133
cond.false.i.i47.loopexit: ; preds = %if.then4.us
cond.false.i.i47.loopexit:
br label %cond.false.i.i47
cond.false.i.i47.loopexit133: ; preds = %if.then4
cond.false.i.i47.loopexit133:
br label %cond.false.i.i47
cond.false.i.i47: ; preds = %cond.false.i.i47.loopexit133, %cond.false.i.i47.loopexit
tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0)) #4
cond.false.i.i47:
tail call void @__assert_fail(i8* getelementptr inbounds ([95 x i8], [95 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @.str.2, i64 0, i64 0), i32 zeroext 90, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @__PRETTY_FUNCTION__._ZN4llvm9StringRefC2EPKcm, i64 0, i64 0))
unreachable
_ZNK4llvm9StringRef6substrEmm.exit50: ; preds = %if.then4
_ZNK4llvm9StringRef6substrEmm.exit50:
%6 = ptrtoint i8* %add.ptr.i40 to i64
%cmp.i34 = icmp eq i64 %sub.i41, 0
br i1 %cmp.i34, label %cond.false.i.loopexit134, label %_ZNK4llvm9StringRefixEm.exit
if.then9: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
tail call void @exit(i32 signext 1) #4
if.then9:
tail call void @exit(i32 signext 1)
unreachable
if.end10: ; preds = %_ZNK4llvm9StringRefixEm.exit
if.end10:
%dec = add i64 %BracketDepth.0.ph, -1
br label %_ZNK4llvm9StringRef6substrEmm.exit
_ZNK4llvm9StringRef6substrEmm.exit.loopexit: ; preds = %_ZNK4llvm9StringRefixEm.exit.us
_ZNK4llvm9StringRef6substrEmm.exit.loopexit:
br label %_ZNK4llvm9StringRef6substrEmm.exit
_ZNK4llvm9StringRef6substrEmm.exit.loopexit132: ; preds = %_ZNK4llvm9StringRefixEm.exit
_ZNK4llvm9StringRef6substrEmm.exit.loopexit132:
br label %_ZNK4llvm9StringRef6substrEmm.exit
_ZNK4llvm9StringRef6substrEmm.exit: ; preds = %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit, %if.end10
_ZNK4llvm9StringRef6substrEmm.exit:
%.sink76 = phi i64 [ %.sink131, %if.end10 ], [ %.sink.us, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %.sink131, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
%7 = phi i8* [ %4, %if.end10 ], [ %1, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %4, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
%BracketDepth.1 = phi i64 [ %dec, %if.end10 ], [ 0, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit ], [ %BracketDepth.0.ph, %_ZNK4llvm9StringRef6substrEmm.exit.loopexit132 ]
@ -120,7 +131,8 @@ _ZNK4llvm9StringRef6substrEmm.exit: ; preds = %_ZNK4llvm9StringRef
br label %while.cond.outer
; CHECK-LABEL: @_Z3fn1N4llvm9StringRefE
; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]]
; Unecessary ISEL (all the registers are the same) is always removed
; CHECK-GEN-ISEL-TRUE-NOT: isel [[SAME:r[0-9]+]], [[SAME]], [[SAME]]
; CHECK-GEN-ISEL-TRUE: isel [[SAME:r[0-9]+]], {{r[0-9]+}}, [[SAME]]
; CHECK: bc 12, eq, [[TRUE:.LBB[0-9]+]]
; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
@ -131,21 +143,6 @@ _ZNK4llvm9StringRef6substrEmm.exit: ; preds = %_ZNK4llvm9StringRef
; Function Attrs: noreturn nounwind
declare void @exit(i32 signext) local_unnamed_addr #1
; Function Attrs: nounwind readonly
declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #2
; Function Attrs: noreturn nounwind
declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*) local_unnamed_addr #1
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-power9-vector,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind readonly }
attributes #4 = { noreturn nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"PIC Level", i32 2}
!1 = !{!"clang version 4.0.0 (trunk 286863) (llvm/trunk 286967)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
declare void @exit(i32 signext)
declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64)
declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)

View File

@ -0,0 +1,71 @@
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; This file mainly tests the case that the two input registers of the ISEL instruction are the same register.
; The foldable ISEL in this test case is introduced at simple register coalescing stage.
; Before that stage we have:
; %vreg18<def> = ISEL8 %vreg5, %vreg2, %vreg15<undef>;
; At simple register coalescing stage, the register coalescer figures out it could remove the copy
; from %vreg2 to %vreg5, put the original value %X3 into %vreg5 directly
; erased: 336r %vreg5<def> = COPY %vreg2
; updated: 288B %vreg5<def> = COPY %X3;
; After that we have:
; updated: 416B %vreg18<def> = ISEL8 %vreg5, %vreg5, %vreg15<undef>;
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=true < %s | FileCheck %s --check-prefix=CHECK-GEN-ISEL-TRUE
; RUN: llc -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
%"struct.pov::ot_block_struct" = type { %"struct.pov::ot_block_struct"*, [3 x double], [3 x double], float, float, float, float, float, float, float, float, float, [3 x float], float, float, [3 x double], i16 }
%"struct.pov::ot_node_struct" = type { %"struct.pov::ot_id_struct", %"struct.pov::ot_block_struct"*, [8 x %"struct.pov::ot_node_struct"*] }
%"struct.pov::ot_id_struct" = type { i32, i32, i32, i32 }
define void @_ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE(%"struct.pov::ot_block_struct"* %new_block) {
; CHECK-GEN-ISEL-TRUE-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
; Note: the following line fold the original isel (isel r4, r3, r3)
; CHECK-GEN-ISEL-TRUE: mr r4, r3
; CHECK-GEN-ISEL-TRUE: isel r29, r5, r6, 4*cr5+lt
; CHECK-GEN-ISEL-TRUE: blr
;
; CHECK-LABEL: _ZN3pov6ot_insEPPNS_14ot_node_structEPNS_15ot_block_structEPNS_12ot_id_structE:
; CHECK: mr r4, r3
; CHECK: bc 12, 4*cr5+lt, .LBB0_3
; CHECK: # %bb.2:
; CHECK: ori r29, r6, 0
; CHECK: b .LBB0_4
; CHECK: .LBB0_3:
; CHECK: addi r29, r5, 0
; CHECK: .LBB0_4:
; CHECK: blr
entry:
br label %while.cond11
while.cond11:
%this_node.0250 = phi %"struct.pov::ot_node_struct"* [ undef, %entry ], [ %1, %cond.false21.i156 ], [ %1, %cond.true18.i153 ]
%temp_id.sroa.21.1 = phi i32 [ undef, %entry ], [ %shr2039.i152, %cond.true18.i153 ], [ %div24.i155, %cond.false21.i156 ]
%0 = load i32, i32* undef, align 4
%cmp17 = icmp eq i32 0, %0
br i1 %cmp17, label %lor.rhs, label %while.body21
lor.rhs:
%Values = getelementptr inbounds %"struct.pov::ot_node_struct", %"struct.pov::ot_node_struct"* %this_node.0250, i64 0, i32 1
store %"struct.pov::ot_block_struct"* %new_block, %"struct.pov::ot_block_struct"** %Values, align 8
ret void
while.body21:
%call.i84 = tail call i8* @ZN3pov10pov_callocEmmPKciS1_pov()
store i8* %call.i84, i8** undef, align 8
%1 = bitcast i8* %call.i84 to %"struct.pov::ot_node_struct"*
br i1 undef, label %cond.true18.i153, label %cond.false21.i156
cond.true18.i153:
%shr2039.i152 = lshr i32 %temp_id.sroa.21.1, 1
br label %while.cond11
cond.false21.i156:
%add23.i154 = add nsw i32 %temp_id.sroa.21.1, 1
%div24.i155 = sdiv i32 %add23.i154, 2
br label %while.cond11
}
declare i8* @ZN3pov10pov_callocEmmPKciS1_pov()

View File

@ -0,0 +1,54 @@
# This file tests the scenario: ISEL RX, RX, RX, CR (X != 0),
# which is redudant and removed.
# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s
--- |
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
%add = add nsw i32 %i, 1
%cond = select i1 %cmp, i32 %add, i32 %j
ret i32 %cond
}
...
---
name: testExpandISEL
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '%x3' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0.entry:
liveins: %x3
%r5 = ADDI %r3, 1
%cr0 = CMPWI %r3, 0
%r3 = ISEL %r3, %r3, %cr0gt
%x3 = EXTSW_32_64 %r3
; CHECK: %r5 = ADDI %r3, 1
; CHECK: %cr0 = CMPWI %r3, 0
; CHECK-NOT: %r3 = ISEL %r3, %r3, %cr0gt
; CHECK: %x3 = EXTSW_32_64 %r3
...

View File

@ -0,0 +1,54 @@
# This file tests the scenario: ISEL RX, RY, RY, CR (X != 0 && Y != 0)
# It is folded into a copy (%RX = OR %RY, %RY)
# RUN: llc -ppc-gen-isel=true -run-pass ppc-expand-isel -o - %s | FileCheck %s
--- |
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @testExpandISEL(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
%add = add nsw i32 %i, 1
%cond = select i1 %cmp, i32 %add, i32 %j
ret i32 %cond
}
...
---
name: testExpandISEL
alignment: 2
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
tracksRegLiveness: true
liveins:
- { reg: '%x3' }
- { reg: '%x4' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
maxCallFrameSize: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
body: |
bb.0.entry:
liveins: %x3, %x4
%r5 = ADDI %r3, 1
%cr0 = CMPWI %r3, 0
%r3 = ISEL %r4, %r4, %cr0gt
; Test fold ISEL to a copy
; CHECK: %r3 = OR %r4, %r4
%x3 = EXTSW_32_64 %r3
...

View File

@ -1,7 +1,7 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc -ppc-gpr-icmps=all -verify-machineinstrs -O2 -ppc-asm-full-reg-names -mcpu=pwr7 -ppc-gen-isel=false < %s | FileCheck %s --implicit-check-not isel
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToIfElse(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
@ -23,7 +23,6 @@ entry:
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToIf(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
@ -39,7 +38,6 @@ entry:
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToElse(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
@ -53,22 +51,7 @@ entry:
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testReplaceISELWithCopy(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
%cond = select i1 %cmp, i32 %j, i32 %j
ret i32 %cond
; CHECK-LABEL: @testReplaceISELWithCopy
; Fix me should really check: addi r3, r4, 0
; but for some reason it's optimized to mr r3, r4
; CHECK: mr r3, r4
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELToNull(i32 signext %i, i32 signext %j) {
entry:
%cmp = icmp sgt i32 %i, 0
@ -81,7 +64,6 @@ entry:
; CHECK: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo2ORIs2ADDIs
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
@ -108,7 +90,6 @@ entry:
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo2ORIs1ADDI
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
@ -133,7 +114,6 @@ entry:
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo1ORI1ADDI
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {
@ -160,7 +140,6 @@ entry:
; CHECK-NEXT: blr
}
; Function Attrs: norecurse nounwind readnone
define signext i32 @testExpandISELsTo0ORI2ADDIs
(i32 signext %a, i32 signext %b, i32 signext %d,
i32 signext %f, i32 signext %g) {