mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
PPCDAGToDAGISel::PostprocessISelDAG()
This patch implements the PPCDAGToDAGISel::PostprocessISelDAG virtual method to perform post-selection peephole optimizations on the DAG representation. One optimization is implemented here: folds to clean up complex addressing expressions for thread-local storage and medium code model. It will also be useful for large code model sequences when those are added later. I originally thought about doing this on the MI representation prior to register assignment, but it's difficult to do effective global dead code elimination at that point. DCE is trivial on the DAG representation. A typical example of a candidate code sequence in assembly: addis 3, 2, globalvar@toc@ha addi 3, 3, globalvar@toc@l lwz 5, 0(3) When the final instruction is a load or store with an immediate offset of zero, the offset from the add-immediate can replace the zero, provided the relocation information is carried along: addis 3, 2, globalvar@toc@ha lwz 5, globalvar@toc@l(3) Since the addi can in general have multiple uses, we need to only delete the instruction when the last use is removed. llvm-svn: 175697
This commit is contained in:
parent
7c7579badf
commit
0e7935e723
@ -67,6 +67,8 @@ namespace {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void PostprocessISelDAG();
|
||||
|
||||
/// getI32Imm - Return a target constant with the specified value, of type
|
||||
/// i32.
|
||||
inline SDValue getI32Imm(unsigned Imm) {
|
||||
@ -1398,6 +1400,159 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
/// PostProcessISelDAG - Perform some late peephole optimizations
|
||||
/// on the DAG representation.
|
||||
void PPCDAGToDAGISel::PostprocessISelDAG() {
|
||||
|
||||
// Skip peepholes at -O0.
|
||||
if (TM.getOptLevel() == CodeGenOpt::None)
|
||||
return;
|
||||
|
||||
// These optimizations are currently supported only for 64-bit SVR4.
|
||||
if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
|
||||
return;
|
||||
|
||||
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
|
||||
++Position;
|
||||
|
||||
while (Position != CurDAG->allnodes_begin()) {
|
||||
SDNode *N = --Position;
|
||||
// Skip dead nodes and any non-machine opcodes.
|
||||
if (N->use_empty() || !N->isMachineOpcode())
|
||||
continue;
|
||||
|
||||
unsigned FirstOp;
|
||||
unsigned StorageOpcode = N->getMachineOpcode();
|
||||
|
||||
switch (StorageOpcode) {
|
||||
default: continue;
|
||||
|
||||
case PPC::LBZ:
|
||||
case PPC::LBZ8:
|
||||
case PPC::LD:
|
||||
case PPC::LFD:
|
||||
case PPC::LFS:
|
||||
case PPC::LHA:
|
||||
case PPC::LHA8:
|
||||
case PPC::LHZ:
|
||||
case PPC::LHZ8:
|
||||
case PPC::LWA:
|
||||
case PPC::LWZ:
|
||||
case PPC::LWZ8:
|
||||
FirstOp = 0;
|
||||
break;
|
||||
|
||||
case PPC::STB:
|
||||
case PPC::STB8:
|
||||
case PPC::STD:
|
||||
case PPC::STFD:
|
||||
case PPC::STFS:
|
||||
case PPC::STH:
|
||||
case PPC::STH8:
|
||||
case PPC::STW:
|
||||
case PPC::STW8:
|
||||
FirstOp = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// If this is a load or store with a zero offset, we may be able to
|
||||
// fold an add-immediate into the memory operation.
|
||||
if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
|
||||
N->getConstantOperandVal(FirstOp) != 0)
|
||||
continue;
|
||||
|
||||
SDValue Base = N->getOperand(FirstOp + 1);
|
||||
if (!Base.isMachineOpcode())
|
||||
continue;
|
||||
|
||||
unsigned Flags = 0;
|
||||
bool ReplaceFlags = true;
|
||||
|
||||
// When the feeding operation is an add-immediate of some sort,
|
||||
// determine whether we need to add relocation information to the
|
||||
// target flags on the immediate operand when we fold it into the
|
||||
// load instruction.
|
||||
//
|
||||
// For something like ADDItocL, the relocation information is
|
||||
// inferred from the opcode; when we process it in the AsmPrinter,
|
||||
// we add the necessary relocation there. A load, though, can receive
|
||||
// relocation from various flavors of ADDIxxx, so we need to carry
|
||||
// the relocation information in the target flags.
|
||||
switch (Base.getMachineOpcode()) {
|
||||
default: continue;
|
||||
|
||||
case PPC::ADDI8:
|
||||
case PPC::ADDI8L:
|
||||
case PPC::ADDIL:
|
||||
// In some cases (such as TLS) the relocation information
|
||||
// is already in place on the operand, so copying the operand
|
||||
// is sufficient.
|
||||
ReplaceFlags = false;
|
||||
// For these cases, the immediate may not be divisible by 4, in
|
||||
// which case the fold is illegal for DS-form instructions. (The
|
||||
// other cases provide aligned addresses and are always safe.)
|
||||
if ((StorageOpcode == PPC::LWA ||
|
||||
StorageOpcode == PPC::LD ||
|
||||
StorageOpcode == PPC::STD) &&
|
||||
(!isa<ConstantSDNode>(Base.getOperand(1)) ||
|
||||
Base.getConstantOperandVal(1) % 4 != 0))
|
||||
continue;
|
||||
break;
|
||||
case PPC::ADDIdtprelL:
|
||||
Flags = PPCII::MO_DTPREL16_LO;
|
||||
break;
|
||||
case PPC::ADDItlsldL:
|
||||
Flags = PPCII::MO_TLSLD16_LO;
|
||||
break;
|
||||
case PPC::ADDItocL:
|
||||
Flags = PPCII::MO_TOC16_LO;
|
||||
break;
|
||||
}
|
||||
|
||||
// We found an opportunity. Reverse the operands from the add
|
||||
// immediate and substitute them into the load or store. If
|
||||
// needed, update the target flags for the immediate operand to
|
||||
// reflect the necessary relocation information.
|
||||
DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
|
||||
DEBUG(Base->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\nN: ");
|
||||
DEBUG(N->dump(CurDAG));
|
||||
DEBUG(dbgs() << "\n");
|
||||
|
||||
SDValue ImmOpnd = Base.getOperand(1);
|
||||
|
||||
// If the relocation information isn't already present on the
|
||||
// immediate operand, add it now.
|
||||
if (ReplaceFlags) {
|
||||
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
|
||||
|
||||
if (GA) {
|
||||
DebugLoc dl = GA->getDebugLoc();
|
||||
const GlobalValue *GV = GA->getGlobal();
|
||||
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
|
||||
} else {
|
||||
ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(ImmOpnd);
|
||||
if (CP) {
|
||||
const Constant *C = CP->getConstVal();
|
||||
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
|
||||
CP->getAlignment(),
|
||||
0, Flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (FirstOp == 1) // Store
|
||||
(void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
|
||||
Base.getOperand(0), N->getOperand(3));
|
||||
else // Load
|
||||
(void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
|
||||
N->getOperand(2));
|
||||
|
||||
// The add-immediate may now be dead, in which case remove it.
|
||||
if (Base.getNode()->use_empty())
|
||||
CurDAG->RemoveDeadNode(Base.getNode());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// createPPCISelDag - This pass converts a legalized DAG into a
|
||||
|
25
test/CodeGen/PowerPC/mcm-10.ll
Normal file
25
test/CodeGen/PowerPC/mcm-10.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
|
||||
|
||||
; Test peephole optimization for medium code model (32-bit TOC offsets)
|
||||
; for loading and storing a static variable scoped to a function.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@test_fn_static.si = internal global i32 0, align 4
|
||||
|
||||
define signext i32 @test_fn_static() nounwind {
|
||||
entry:
|
||||
%0 = load i32* @test_fn_static.si, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @test_fn_static.si, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; CHECK: test_fn_static:
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
|
||||
; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
|
||||
; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
|
||||
; CHECK: .type [[VAR]],@object
|
||||
; CHECK: .local [[VAR]]
|
||||
; CHECK: .comm [[VAR]],4,4
|
27
test/CodeGen/PowerPC/mcm-11.ll
Normal file
27
test/CodeGen/PowerPC/mcm-11.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
|
||||
|
||||
; Test peephole optimization for medium code model (32-bit TOC offsets)
|
||||
; for loading and storing a file-scope static variable.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@gi = global i32 5, align 4
|
||||
|
||||
define signext i32 @test_file_static() nounwind {
|
||||
entry:
|
||||
%0 = load i32* @gi, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @gi, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; CHECK: test_file_static:
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
|
||||
; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
|
||||
; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
|
||||
; CHECK: .type [[VAR]],@object
|
||||
; CHECK: .data
|
||||
; CHECK: .globl [[VAR]]
|
||||
; CHECK: [[VAR]]:
|
||||
; CHECK: .long 5
|
18
test/CodeGen/PowerPC/mcm-12.ll
Normal file
18
test/CodeGen/PowerPC/mcm-12.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
|
||||
|
||||
; Test peephole optimization for medium code model (32-bit TOC offsets)
|
||||
; for loading a value from the constant pool (TOC-relative).
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define double @test_double_const() nounwind {
|
||||
entry:
|
||||
ret double 0x3F4FD4920B498CF0
|
||||
}
|
||||
|
||||
; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
|
||||
; CHECK: .quad 4562098671269285104
|
||||
; CHECK: test_double_const:
|
||||
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
|
||||
; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
|
77
test/CodeGen/PowerPC/mcm-obj-2.ll
Normal file
77
test/CodeGen/PowerPC/mcm-obj-2.ll
Normal file
@ -0,0 +1,77 @@
|
||||
; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
|
||||
; RUN: elf-dump --dump-section-data | FileCheck %s
|
||||
|
||||
; FIXME: When asm-parse is available, could make this an assembly test.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@test_fn_static.si = internal global i32 0, align 4
|
||||
|
||||
define signext i32 @test_fn_static() nounwind {
|
||||
entry:
|
||||
%0 = load i32* @test_fn_static.si, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @test_fn_static.si, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
||||
; accessing function-scoped variable si.
|
||||
;
|
||||
; CHECK: Relocation 0
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM2:[0-9]+]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000032
|
||||
; CHECK: Relocation 1
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM2]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000030
|
||||
; CHECK: Relocation 2
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM2]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000030
|
||||
|
||||
@gi = global i32 5, align 4
|
||||
|
||||
define signext i32 @test_file_static() nounwind {
|
||||
entry:
|
||||
%0 = load i32* @gi, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @gi, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
||||
; accessing file-scope variable gi.
|
||||
;
|
||||
; CHECK: Relocation 3
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM3:[0-9]+]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000032
|
||||
; CHECK: Relocation 4
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM3]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000030
|
||||
; CHECK: Relocation 5
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM3]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000030
|
||||
|
||||
define double @test_double_const() nounwind {
|
||||
entry:
|
||||
ret double 0x3F4FD4920B498CF0
|
||||
}
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
|
||||
; accessing a constant.
|
||||
;
|
||||
; CHECK: Relocation 6
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM4:[0-9]+]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000032
|
||||
; CHECK: Relocation 7
|
||||
; CHECK-NEXT: 'r_offset'
|
||||
; CHECK-NEXT: 'r_sym', 0x[[SYM4]]
|
||||
; CHECK-NEXT: 'r_type', 0x00000030
|
||||
|
15
test/CodeGen/PowerPC/tls-2.ll
Normal file
15
test/CodeGen/PowerPC/tls-2.ll
Normal file
@ -0,0 +1,15 @@
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-freebsd10.0"
|
||||
; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s
|
||||
|
||||
@a = thread_local global i32 0, align 4
|
||||
|
||||
;CHECK: localexec:
|
||||
define i32 @localexec() nounwind {
|
||||
entry:
|
||||
;CHECK: addis [[REG1:[0-9]+]], 13, a@tprel@ha
|
||||
;CHECK-NEXT: li [[REG2:[0-9]+]], 42
|
||||
;CHECK-NEXT: stw [[REG2]], a@tprel@l([[REG1]])
|
||||
store i32 42, i32* @a, align 4
|
||||
ret i32 0
|
||||
}
|
24
test/CodeGen/PowerPC/tls-ld-2.ll
Normal file
24
test/CodeGen/PowerPC/tls-ld-2.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s
|
||||
|
||||
; Test peephole optimization for thread-local storage using the
|
||||
; local dynamic model.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@a = hidden thread_local global i32 0, align 4
|
||||
|
||||
define signext i32 @main() nounwind {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
store i32 0, i32* %retval
|
||||
%0 = load i32* @a, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
|
||||
; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l
|
||||
; CHECK-NEXT: bl __tls_get_addr(a@tlsld)
|
||||
; CHECK-NEXT: nop
|
||||
; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
|
||||
; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
|
@ -1,6 +1,6 @@
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-freebsd10.0"
|
||||
; RUN: llc < %s -march=ppc64 | FileCheck %s
|
||||
; RUN: llc -O0 < %s -march=ppc64 | FileCheck %s
|
||||
|
||||
@a = thread_local global i32 0, align 4
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user