diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 14535065d55..8663dd41e5f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -67,6 +67,8 @@ namespace { return true; } + virtual void PostprocessISelDAG(); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -1398,6 +1400,159 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +/// PostProcessISelDAG - Perform some late peephole optimizations +/// on the DAG representation. +void PPCDAGToDAGISel::PostprocessISelDAG() { + + // Skip peepholes at -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // These optimizations are currently supported only for 64-bit SVR4. + if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + return; + + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = --Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + unsigned FirstOp; + unsigned StorageOpcode = N->getMachineOpcode(); + + switch (StorageOpcode) { + default: continue; + + case PPC::LBZ: + case PPC::LBZ8: + case PPC::LD: + case PPC::LFD: + case PPC::LFS: + case PPC::LHA: + case PPC::LHA8: + case PPC::LHZ: + case PPC::LHZ8: + case PPC::LWA: + case PPC::LWZ: + case PPC::LWZ8: + FirstOp = 0; + break; + + case PPC::STB: + case PPC::STB8: + case PPC::STD: + case PPC::STFD: + case PPC::STFS: + case PPC::STH: + case PPC::STH8: + case PPC::STW: + case PPC::STW8: + FirstOp = 1; + break; + } + + // If this is a load or store with a zero offset, we may be able to + // fold an add-immediate into the memory operation. + if (!isa(N->getOperand(FirstOp)) || + N->getConstantOperandVal(FirstOp) != 0) + continue; + + SDValue Base = N->getOperand(FirstOp + 1); + if (!Base.isMachineOpcode()) + continue; + + unsigned Flags = 0; + bool ReplaceFlags = true; + + // When the feeding operation is an add-immediate of some sort, + // determine whether we need to add relocation information to the + // target flags on the immediate operand when we fold it into the + // load instruction. + // + // For something like ADDItocL, the relocation information is + // inferred from the opcode; when we process it in the AsmPrinter, + // we add the necessary relocation there. A load, though, can receive + // relocation from various flavors of ADDIxxx, so we need to carry + // the relocation information in the target flags. + switch (Base.getMachineOpcode()) { + default: continue; + + case PPC::ADDI8: + case PPC::ADDI8L: + case PPC::ADDIL: + // In some cases (such as TLS) the relocation information + // is already in place on the operand, so copying the operand + // is sufficient. + ReplaceFlags = false; + // For these cases, the immediate may not be divisible by 4, in + // which case the fold is illegal for DS-form instructions. (The + // other cases provide aligned addresses and are always safe.) + if ((StorageOpcode == PPC::LWA || + StorageOpcode == PPC::LD || + StorageOpcode == PPC::STD) && + (!isa(Base.getOperand(1)) || + Base.getConstantOperandVal(1) % 4 != 0)) + continue; + break; + case PPC::ADDIdtprelL: + Flags = PPCII::MO_DTPREL16_LO; + break; + case PPC::ADDItlsldL: + Flags = PPCII::MO_TLSLD16_LO; + break; + case PPC::ADDItocL: + Flags = PPCII::MO_TOC16_LO; + break; + } + + // We found an opportunity. Reverse the operands from the add + // immediate and substitute them into the load or store. If + // needed, update the target flags for the immediate operand to + // reflect the necessary relocation information. + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + SDValue ImmOpnd = Base.getOperand(1); + + // If the relocation information isn't already present on the + // immediate operand, add it now. + if (ReplaceFlags) { + GlobalAddressSDNode *GA = dyn_cast(ImmOpnd); + + if (GA) { + DebugLoc dl = GA->getDebugLoc(); + const GlobalValue *GV = GA->getGlobal(); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + } else { + ConstantPoolSDNode *CP = dyn_cast(ImmOpnd); + if (CP) { + const Constant *C = CP->getConstVal(); + ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, + CP->getAlignment(), + 0, Flags); + } + } + } + + if (FirstOp == 1) // Store + (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, + Base.getOperand(0), N->getOperand(3)); + else // Load + (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), + N->getOperand(2)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} /// createPPCISelDag - This pass converts a legalized DAG into a diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll new file mode 100644 index 00000000000..4bec3e16fa0 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-10.ll @@ -0,0 +1,25 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing a static variable scoped to a function. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@test_fn_static.si = internal global i32 0, align 4 + +define signext i32 @test_fn_static() nounwind { +entry: + %0 = load i32* @test_fn_static.si, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @test_fn_static.si, align 4 + ret i32 %0 +} + +; CHECK: test_fn_static: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: .type [[VAR]],@object +; CHECK: .local [[VAR]] +; CHECK: .comm [[VAR]],4,4 diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll new file mode 100644 index 00000000000..f2bc4c9cb72 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-11.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing a file-scope static variable. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@gi = global i32 5, align 4 + +define signext i32 @test_file_static() nounwind { +entry: + %0 = load i32* @gi, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @gi, align 4 + ret i32 %0 +} + +; CHECK: test_file_static: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lwz {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK: .type [[VAR]],@object +; CHECK: .data +; CHECK: .globl [[VAR]] +; CHECK: [[VAR]]: +; CHECK: .long 5 diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll new file mode 100644 index 00000000000..911305d4355 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-12.ll @@ -0,0 +1,18 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading a value from the constant pool (TOC-relative). + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define double @test_double_const() nounwind { +entry: + ret double 0x3F4FD4920B498CF0 +} + +; CHECK: [[VAR:[a-z0-9A-Z_.]+]]: +; CHECK: .quad 4562098671269285104 +; CHECK: test_double_const: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll new file mode 100644 index 00000000000..2dd1718ba75 --- /dev/null +++ b/test/CodeGen/PowerPC/mcm-obj-2.ll @@ -0,0 +1,77 @@ +; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \ +; RUN: elf-dump --dump-section-data | FileCheck %s + +; FIXME: When asm-parse is available, could make this an assembly test. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@test_fn_static.si = internal global i32 0, align 4 + +define signext i32 @test_fn_static() nounwind { +entry: + %0 = load i32* @test_fn_static.si, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @test_fn_static.si, align 4 + ret i32 %0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing function-scoped variable si. +; +; CHECK: Relocation 0 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 1 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2]] +; CHECK-NEXT: 'r_type', 0x00000030 +; CHECK: Relocation 2 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM2]] +; CHECK-NEXT: 'r_type', 0x00000030 + +@gi = global i32 5, align 4 + +define signext i32 @test_file_static() nounwind { +entry: + %0 = load i32* @gi, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @gi, align 4 + ret i32 %0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing file-scope variable gi. +; +; CHECK: Relocation 3 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 4 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3]] +; CHECK-NEXT: 'r_type', 0x00000030 +; CHECK: Relocation 5 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM3]] +; CHECK-NEXT: 'r_type', 0x00000030 + +define double @test_double_const() nounwind { +entry: + ret double 0x3F4FD4920B498CF0 +} + +; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for +; accessing a constant. +; +; CHECK: Relocation 6 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM4:[0-9]+]] +; CHECK-NEXT: 'r_type', 0x00000032 +; CHECK: Relocation 7 +; CHECK-NEXT: 'r_offset' +; CHECK-NEXT: 'r_sym', 0x[[SYM4]] +; CHECK-NEXT: 'r_type', 0x00000030 + diff --git a/test/CodeGen/PowerPC/tls-2.ll b/test/CodeGen/PowerPC/tls-2.ll new file mode 100644 index 00000000000..20d8fe46ea1 --- /dev/null +++ b/test/CodeGen/PowerPC/tls-2.ll @@ -0,0 +1,15 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-freebsd10.0" +; RUN: llc -O1 < %s -march=ppc64 | FileCheck %s + +@a = thread_local global i32 0, align 4 + +;CHECK: localexec: +define i32 @localexec() nounwind { +entry: +;CHECK: addis [[REG1:[0-9]+]], 13, a@tprel@ha +;CHECK-NEXT: li [[REG2:[0-9]+]], 42 +;CHECK-NEXT: stw [[REG2]], a@tprel@l([[REG1]]) + store i32 42, i32* @a, align 4 + ret i32 0 +} diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll new file mode 100644 index 00000000000..4954afeb24f --- /dev/null +++ b/test/CodeGen/PowerPC/tls-ld-2.ll @@ -0,0 +1,24 @@ +; RUN: llc -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck %s + +; Test peephole optimization for thread-local storage using the +; local dynamic model. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@a = hidden thread_local global i32 0, align 4 + +define signext i32 @main() nounwind { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = load i32* @a, align 4 + ret i32 %0 +} + +; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha +; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l +; CHECK-NEXT: bl __tls_get_addr(a@tlsld) +; CHECK-NEXT: nop +; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]]) diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll index 713893bf586..9021f03b9d8 100644 --- a/test/CodeGen/PowerPC/tls.ll +++ b/test/CodeGen/PowerPC/tls.ll @@ -1,6 +1,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-freebsd10.0" -; RUN: llc < %s -march=ppc64 | FileCheck %s +; RUN: llc -O0 < %s -march=ppc64 | FileCheck %s @a = thread_local global i32 0, align 4