1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[PowerPC] Utilize DQ-Form instructions for spill/restore and fix FrameIndex elimination to only use lis/addi if necessary.

Currently we produce a bunch of unnecessary code when emitting the
prologue/epilogue for spills/restores.  Namely, if the load from stack
slot/store to stack slot instruction is an X-Form instruction, we will
always produce an LIS/ORI sequence for the stack offset.

Furthermore, we have not exploited the P9 vector D-Form loads/stores for this
purpose.

This patch address both issues.

Specifying the D-Form load as the instruction to use for stack spills/reloads
should be safe because:

1. The stack should be aligned according to the ABI
2. If the stack isn't aligned, PPCRegisterInfo::eliminateFrameIndex() will
   check for the offset being a multiple of 16 and will convert it to an
   X-Form instruction if it isn't.

Differential Revision : https://reviews.llvm.org/D38758

llvm-svn: 315500
This commit is contained in:
Lei Huang 2017-10-11 20:20:58 +00:00
parent 1a5684f1d0
commit 438bff63df
6 changed files with 224 additions and 247 deletions

View File

@ -281,7 +281,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
case PPC::LXVX:
case PPC::LXV:
case PPC::QVLFDX:
case PPC::QVLFSXs:
case PPC::QVLFDXb:
@ -335,7 +335,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
case PPC::STXVX:
case PPC::STXV:
case PPC::QVSTFDX:
case PPC::QVSTFSXs:
case PPC::QVSTFDXb:
@ -1048,7 +1048,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X;
unsigned Op = Subtarget.hasP9Vector() ? PPC::STXV : PPC::STXVD2X;
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op))
.addReg(SrcReg,
getKillRegState(isKill)),
@ -1186,7 +1186,7 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
FrameIdx));
NonRI = true;
} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X;
unsigned Op = Subtarget.hasP9Vector() ? PPC::LXV : PPC::LXVD2X;
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg),
FrameIdx));
NonRI = true;

View File

@ -933,11 +933,16 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
SReg = MF.getRegInfo().createVirtualRegister(RC);
// Insert a set of rA with the full offset value before the ld, st, or add
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
.addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
.addReg(SRegHi, RegState::Kill)
.addImm(Offset);
if (isInt<16>(Offset))
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg)
.addImm(Offset);
else {
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
.addImm(Offset >> 16);
BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
.addReg(SRegHi, RegState::Kill)
.addImm(Offset);
}
// Convert into indexed form of the instruction:
//

View File

@ -2,8 +2,8 @@
; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s --check-prefix=ELF64LE
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 -mattr=-vsx | FileCheck %s --check-prefix=PPC970
;; Tests for 970 don't use -fast-isel-abort=1 because we intentionally punt
@ -13,17 +13,13 @@
define void @sitofp_single_i64(i64 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i64
; ELF64LE: sitofp_single_i64
; CHECK: sitofp_single_i64
; PPC970: sitofp_single_i64
%b.addr = alloca float, align 4
%conv = sitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfids
; CHECK: std
; CHECK: lfd
; CHECK: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@ -34,21 +30,14 @@ entry:
define void @sitofp_single_i32(i32 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i32
; ELF64LE: sitofp_single_i32
; CHECK: sitofp_single_i32
; PPC970: sitofp_single_i32
%b.addr = alloca float, align 4
%conv = sitofp i32 %a to float
; ELF64: std
; stack offset used to load the float: 65524 = -16 + 4
; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfids
; ELF64LE: std
; stack offset used to load the float: 65520 = -16 + 0
; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
; ELF64LE: lfiwax
; ELF64LE: fcfids
; CHECK: std
; CHECK-NEXT: li
; CHECK-NEXT: lfiwax
; CHECK-NEXT: fcfids
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@ -59,19 +48,14 @@ entry:
define void @sitofp_single_i16(i16 %a, float %b) nounwind {
entry:
; ELF64: sitofp_single_i16
; ELF64LE: sitofp_single_i16
; CHECK: sitofp_single_i16
; PPC970: sitofp_single_i16
%b.addr = alloca float, align 4
%conv = sitofp i16 %a to float
; ELF64: extsh
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
; ELF64LE: extsh
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfids
; CHECK: extsh
; CHECK: std
; CHECK: lfd
; CHECK: fcfids
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@ -83,19 +67,14 @@ entry:
define void @sitofp_single_i8(i8 %a) nounwind {
entry:
; ELF64: sitofp_single_i8
; ELF64LE: sitofp_single_i8
; CHECK: sitofp_single_i8
; PPC970: sitofp_single_i8
%b.addr = alloca float, align 4
%conv = sitofp i8 %a to float
; ELF64: extsb
; ELF64: std
; ELF64: lfd
; ELF64: fcfids
; ELF64LE: extsb
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfids
; CHECK: extsb
; CHECK: std
; CHECK: lfd
; CHECK: fcfids
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@ -107,21 +86,16 @@ entry:
define void @sitofp_double_i32(i32 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i32
; ELF64LE: sitofp_double_i32
; CHECK: sitofp_double_i32
; PPC970: sitofp_double_i32
%b.addr = alloca double, align 8
%conv = sitofp i32 %a to double
; ELF64: std
; stack offset used to load the float: 65524 = -16 + 4
; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwax
; ELF64: fcfid
; ELF64LE: std
; stack offset used to load the float: 65520 = -16 + 0
; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
; ELF64LE: lfiwax
; ELF64LE: fcfid
; CHECK: std
; CHECK-NOT: ori
; CHECK: li
; CHECK-NOT: ori
; CHECK: lfiwax
; CHECK: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@ -131,17 +105,13 @@ entry:
define void @sitofp_double_i64(i64 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i64
; ELF64LE: sitofp_double_i64
; CHECK: sitofp_double_i64
; PPC970: sitofp_double_i64
%b.addr = alloca double, align 8
%conv = sitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfid
; CHECK: std
; CHECK: lfd
; CHECK: fcfid
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@ -151,19 +121,14 @@ entry:
define void @sitofp_double_i16(i16 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i16
; ELF64LE: sitofp_double_i16
; CHECK: sitofp_double_i16
; PPC970: sitofp_double_i16
%b.addr = alloca double, align 8
%conv = sitofp i16 %a to double
; ELF64: extsh
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
; ELF64LE: extsh
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfid
; CHECK: extsh
; CHECK: std
; CHECK: lfd
; CHECK: fcfid
; PPC970: extsh
; PPC970: std
; PPC970: lfd
@ -174,19 +139,14 @@ entry:
define void @sitofp_double_i8(i8 %a, double %b) nounwind {
entry:
; ELF64: sitofp_double_i8
; ELF64LE: sitofp_double_i8
; CHECK: sitofp_double_i8
; PPC970: sitofp_double_i8
%b.addr = alloca double, align 8
%conv = sitofp i8 %a to double
; ELF64: extsb
; ELF64: std
; ELF64: lfd
; ELF64: fcfid
; ELF64LE: extsb
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfid
; CHECK: extsb
; CHECK: std
; CHECK: lfd
; CHECK: fcfid
; PPC970: extsb
; PPC970: std
; PPC970: lfd
@ -199,17 +159,13 @@ entry:
define void @uitofp_single_i64(i64 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i64
; ELF64LE: uitofp_single_i64
; CHECK: uitofp_single_i64
; PPC970: uitofp_single_i64
%b.addr = alloca float, align 4
%conv = uitofp i64 %a to float
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidus
; CHECK: std
; CHECK: lfd
; CHECK: fcfidus
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
ret void
@ -217,21 +173,16 @@ entry:
define void @uitofp_single_i32(i32 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i32
; ELF64LE: uitofp_single_i32
; CHECK: uitofp_single_i32
; PPC970: uitofp_single_i32
%b.addr = alloca float, align 4
%conv = uitofp i32 %a to float
; ELF64: std
; stack offset used to load the float: 65524 = -16 + 4
; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidus
; ELF64LE: std
; stack offset used to load the float: 65520 = -16 + 0
; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
; ELF64LE: lfiwzx
; ELF64LE: fcfidus
; CHECK: std
; CHECK-NOT: ori
; CHECK: li
; CHECK-NOT: ori
; CHECK: lfiwzx
; CHECK: fcfidus
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidus
store float %conv, float* %b.addr, align 4
@ -240,19 +191,14 @@ entry:
define void @uitofp_single_i16(i16 %a, float %b) nounwind {
entry:
; ELF64: uitofp_single_i16
; ELF64LE: uitofp_single_i16
; CHECK: uitofp_single_i16
; PPC970: uitofp_single_i16
%b.addr = alloca float, align 4
%conv = uitofp i16 %a to float
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidus
; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; CHECK: std
; CHECK: lfd
; CHECK: fcfidus
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
@ -264,19 +210,14 @@ entry:
define void @uitofp_single_i8(i8 %a) nounwind {
entry:
; ELF64: uitofp_single_i8
; ELF64LE: uitofp_single_i8
; CHECK: uitofp_single_i8
; PPC970: uitofp_single_i8
%b.addr = alloca float, align 4
%conv = uitofp i8 %a to float
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; ELF64: std
; ELF64: lfd
; ELF64: fcfidus
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidus
; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; CHECK: std
; CHECK: lfd
; CHECK: fcfidus
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
@ -288,17 +229,13 @@ entry:
define void @uitofp_double_i64(i64 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i64
; ELF64LE: uitofp_double_i64
; CHECK: uitofp_double_i64
; PPC970: uitofp_double_i64
%b.addr = alloca double, align 8
%conv = uitofp i64 %a to double
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidu
; CHECK: std
; CHECK: lfd
; CHECK: fcfidu
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
ret void
@ -306,21 +243,15 @@ entry:
define void @uitofp_double_i32(i32 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i32
; ELF64LE: uitofp_double_i32
; CHECK: uitofp_double_i32
; PPC970: uitofp_double_i32
%b.addr = alloca double, align 8
%conv = uitofp i32 %a to double
; ELF64: std
; stack offset used to load the float: 65524 = -16 + 4
; ELF64: ori {{[0-9]+}}, {{[0-9]+}}, 65524
; ELF64: lfiwzx
; ELF64: fcfidu
; ELF64LE: std
; stack offset used to load the float: 65520 = -16 + 0
; ELF64LE: ori {{[0-9]+}}, {{[0-9]+}}, 65520
; ELF64LE: lfiwzx
; ELF64LE: fcfidu
; CHECK: std
; CHECK-NEXT: li
; CHECK-NEXT: lfiwzx
; CHECK-NEXT: fcfidu
; CHECKLE: fcfidu
; PPC970-NOT: lfiwzx
; PPC970-NOT: fcfidu
store double %conv, double* %b.addr, align 8
@ -329,19 +260,14 @@ entry:
define void @uitofp_double_i16(i16 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i16
; ELF64LE: uitofp_double_i16
; CHECK: uitofp_double_i16
; PPC970: uitofp_double_i16
%b.addr = alloca double, align 8
%conv = uitofp i16 %a to double
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidu
; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 48
; CHECK: std
; CHECK: lfd
; CHECK: fcfidu
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
@ -352,19 +278,14 @@ entry:
define void @uitofp_double_i8(i8 %a, double %b) nounwind {
entry:
; ELF64: uitofp_double_i8
; ELF64LE: uitofp_double_i8
; CHECK: uitofp_double_i8
; PPC970: uitofp_double_i8
%b.addr = alloca double, align 8
%conv = uitofp i8 %a to double
; ELF64: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; ELF64: std
; ELF64: lfd
; ELF64: fcfidu
; ELF64LE: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidu
; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 56
; CHECK: std
; CHECK: lfd
; CHECK: fcfidu
; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
@ -377,17 +298,13 @@ entry:
define void @fptosi_float_i32(float %a) nounwind {
entry:
; ELF64: fptosi_float_i32
; ELF64LE: fptosi_float_i32
; CHECK: fptosi_float_i32
; PPC970: fptosi_float_i32
%b.addr = alloca i32, align 4
%conv = fptosi float %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
; ELF64LE: fctiwz
; ELF64LE: stfd
; ELF64LE: lwa
; CHECK: fctiwz
; CHECK: stfd
; CHECK: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@ -397,17 +314,13 @@ entry:
define void @fptosi_float_i64(float %a) nounwind {
entry:
; ELF64: fptosi_float_i64
; ELF64LE: fptosi_float_i64
; CHECK: fptosi_float_i64
; PPC970: fptosi_float_i64
%b.addr = alloca i64, align 4
%conv = fptosi float %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
; ELF64LE: fctidz
; ELF64LE: stfd
; ELF64LE: ld
; CHECK: fctidz
; CHECK: stfd
; CHECK: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@ -417,17 +330,13 @@ entry:
define void @fptosi_double_i32(double %a) nounwind {
entry:
; ELF64: fptosi_double_i32
; ELF64LE: fptosi_double_i32
; CHECK: fptosi_double_i32
; PPC970: fptosi_double_i32
%b.addr = alloca i32, align 8
%conv = fptosi double %a to i32
; ELF64: fctiwz
; ELF64: stfd
; ELF64: lwa
; ELF64LE: fctiwz
; ELF64LE: stfd
; ELF64LE: lwa
; CHECK: fctiwz
; CHECK: stfd
; CHECK: lwa
; PPC970: fctiwz
; PPC970: stfd
; PPC970: lwa
@ -437,17 +346,13 @@ entry:
define void @fptosi_double_i64(double %a) nounwind {
entry:
; ELF64: fptosi_double_i64
; ELF64LE: fptosi_double_i64
; CHECK: fptosi_double_i64
; PPC970: fptosi_double_i64
%b.addr = alloca i64, align 8
%conv = fptosi double %a to i64
; ELF64: fctidz
; ELF64: stfd
; ELF64: ld
; ELF64LE: fctidz
; ELF64LE: stfd
; ELF64LE: ld
; CHECK: fctidz
; CHECK: stfd
; CHECK: ld
; PPC970: fctidz
; PPC970: stfd
; PPC970: ld
@ -459,17 +364,13 @@ entry:
define void @fptoui_float_i32(float %a) nounwind {
entry:
; ELF64: fptoui_float_i32
; ELF64LE: fptoui_float_i32
; CHECK: fptoui_float_i32
; PPC970: fptoui_float_i32
%b.addr = alloca i32, align 4
%conv = fptoui float %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
; ELF64LE: fctiwuz
; ELF64LE: stfd
; ELF64LE: lwz
; CHECK: fctiwuz
; CHECK: stfd
; CHECK: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@ -479,17 +380,13 @@ entry:
define void @fptoui_float_i64(float %a) nounwind {
entry:
; ELF64: fptoui_float_i64
; ELF64LE: fptoui_float_i64
; CHECK: fptoui_float_i64
; PPC970: fptoui_float_i64
%b.addr = alloca i64, align 4
%conv = fptoui float %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
; ELF64LE: fctiduz
; ELF64LE: stfd
; ELF64LE: ld
; CHECK: fctiduz
; CHECK: stfd
; CHECK: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 4
ret void
@ -497,17 +394,13 @@ entry:
define void @fptoui_double_i32(double %a) nounwind {
entry:
; ELF64: fptoui_double_i32
; ELF64LE: fptoui_double_i32
; CHECK: fptoui_double_i32
; PPC970: fptoui_double_i32
%b.addr = alloca i32, align 8
%conv = fptoui double %a to i32
; ELF64: fctiwuz
; ELF64: stfd
; ELF64: lwz
; ELF64LE: fctiwuz
; ELF64LE: stfd
; ELF64LE: lwz
; CHECK: fctiwuz
; CHECK: stfd
; CHECK: lwz
; PPC970: fctidz
; PPC970: stfd
; PPC970: lwz
@ -517,17 +410,13 @@ entry:
define void @fptoui_double_i64(double %a) nounwind {
entry:
; ELF64: fptoui_double_i64
; ELF64LE: fptoui_double_i64
; CHECK: fptoui_double_i64
; PPC970: fptoui_double_i64
%b.addr = alloca i64, align 8
%conv = fptoui double %a to i64
; ELF64: fctiduz
; ELF64: stfd
; ELF64: ld
; ELF64LE: fctiduz
; ELF64LE: stfd
; ELF64LE: ld
; CHECK: fctiduz
; CHECK: stfd
; CHECK: ld
; PPC970-NOT: fctiduz
store i64 %conv, i64* %b.addr, align 8
ret void

View File

@ -23,8 +23,11 @@ entry:
; CHECK-REG: blr
; CHECK-FISL: @foo1
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65384
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -152
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxsdx 1, 1, 3
; CHECK-FISL: blr

View File

@ -235,8 +235,11 @@ entry:
; CHECK-FISL-LABEL: @test14
; CHECK-FISL: xxlor 0, 34, 35
; CHECK-FISL: xxlnor 34, 34, 35
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -16
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxvd2x 0, 1, 3
; CHECK-FISL: blr
@ -260,8 +263,11 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlnor 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -16
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
@ -285,8 +291,11 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlnor 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -16
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
@ -330,8 +339,11 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlandc 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -16
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr
@ -355,8 +367,11 @@ entry:
; CHECK-FISL: xxlor 36, 0, 0
; CHECK-FISL: xxlandc 0, 34, 35
; CHECK-FISL: xxlor 34, 0, 0
; CHECK-FISL: lis 3, -1
; CHECK-FISL: ori 3, 3, 65520
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: li 3, -16
; CHECK-FISL-NOT: lis
; CHECK-FISL-NOT: ori
; CHECK-FISL: stxvd2x 36, 1, 3
; CHECK-FISL: blr

View File

@ -0,0 +1,65 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-PWR9 %s
define <4 x i32> @testSpill(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: testSpill:
; CHECK: li 11, 80
; CHECK: li 12, 96
; CHECK: li 3, 48
; CHECK: li 10, 64
; CHECK: stxvd2x 62, 1, 11 # 16-byte Folded Spill
; CHECK: stxvd2x 63, 1, 12 # 16-byte Folded Spill
; CHECK: stxvd2x 60, 1, 3 # 16-byte Folded Spill
; CHECK: stxvd2x 61, 1, 10 # 16-byte Folded Spill
; CHECK: li 9, 96
; CHECK: li 10, 80
; CHECK: li 11, 64
; CHECK: li 12, 48
; CHECK: lxvd2x 63, 1, 9 # 16-byte Folded Reload
; CHECK: lxvd2x 62, 1, 10 # 16-byte Folded Reload
; CHECK: lxvd2x 61, 1, 11 # 16-byte Folded Reload
; CHECK: lxvd2x 60, 1, 12 # 16-byte Folded Reload
; CHECK: mtlr 0
; CHECK-NEXT: blr
;
; CHECK-PWR9-LABEL: testSpill:
; CHECK-PWR9: stxv 62, 80(1) # 16-byte Folded Spill
; CHECK-PWR9: stxv 63, 96(1) # 16-byte Folded Spill
; CHECK-PWR9: stxv 60, 48(1) # 16-byte Folded Spill
; CHECK-PWR9: stxv 61, 64(1) # 16-byte Folded Spill
; CHECK-PWR9: lxv 63, 96(1) # 16-byte Folded Reload
; CHECK-PWR9: lxv 62, 80(1) # 16-byte Folded Reload
; CHECK-PWR9: lxv 61, 64(1) # 16-byte Folded Reload
; CHECK-PWR9: lxv 60, 48(1) # 16-byte Folded Reload
; CHECK-PWR9: mtlr 0
; CHECK-PWR9-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
%tobool = icmp eq i32 %0, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
%call = tail call <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b)
br label %if.end
if.else: ; preds = %entry
%call1 = tail call <4 x i32> @test2(<4 x i32> %b, <4 x i32> %a)
br label %if.end
if.end: ; preds = %if.else, %if.then
%c.0.in = phi <4 x i32> [ %call, %if.then ], [ %call1, %if.else ]
%call3 = tail call <4 x i32> @test1(<4 x i32> %b, <4 x i32> %a)
%call5 = tail call <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b)
%add4 = add <4 x i32> %a, <i32 0, i32 0, i32 2, i32 2>
%add6 = add <4 x i32> %add4, %c.0.in
%c.0 = add <4 x i32> %add6, %call3
%add7 = add <4 x i32> %c.0, %call5
ret <4 x i32> %add7
}
; Function Attrs: nounwind readnone
declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
declare <4 x i32> @test1(<4 x i32>, <4 x i32>)
declare <4 x i32> @test2(<4 x i32>, <4 x i32>)