1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[PowerPC] Remove CRBits Copy Of Unset/set CBit

For the situation, where we generate the following code:

       crxor 8, 8, 8
       < Some instructions>
.LBB0_1:
       < Some instructions>
       cror 1, 8, 8

cror (COPY of CRbit) depends on the result of the crxor instruction.
CR8 is known to be zero as crxor is equivalent to CRUNSET. We can simply use
crxor 1, 1, 1 instead to zero out CR1, which does not have any dependency on
any previous instruction.

This patch will optimize it to:

        < Some instructions>
.LBB0_1:
        < Some instructions>
        cror 1, 1, 1

Patch By: Victor Huang (NeHuang)

Differential Revision: https://reviews.llvm.org/D62044

llvm-svn: 361632
This commit is contained in:
Stefan Pintilie 2019-05-24 12:05:37 +00:00
parent 641f2653c4
commit 234a0edb79
4 changed files with 192 additions and 4 deletions

View File

@ -341,6 +341,8 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::V_SETALLONESB:
case PPC::V_SETALLONESH:
case PPC::V_SETALLONES:
case PPC::CRSET:
case PPC::CRUNSET:
return true;
}
return false;

View File

@ -2546,6 +2546,7 @@ def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
[(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
"creqv $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 1)]>;
@ -2553,6 +2554,7 @@ def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
"crxor $dst, $dst, $dst", IIC_BrCR,
[(set i1:$dst, 0)]>;
}
let Defs = [CR1EQ], CRD = 6 in {
def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),

View File

@ -1,6 +1,6 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
@ -16,13 +16,19 @@
; Function Attrs: nounwind
define dso_local signext i32 @spillCRSET(i32 signext %p1, i32 signext %p2) {
; CHECK-LABEL: spillCRSET:
; CHECK: # %bb.0: # %entry
; CHECK: lis [[REG1:.*]], -32768
; CHECK: # %bb.2:
; CHECK-DAG: crnor [[CREG:.*]]*cr5+lt, eq, eq
; CHECK-DAG: mfocrf [[REG2:.*]], [[CREG]]
; CHECK-DAG: rlwinm [[REG2]], [[REG2]]
; CHECK: .LBB0_3:
; CHECK-DAG: creqv [[CREG:.*]]*cr5+lt, [[CREG]]*cr5+lt, [[CREG]]*cr5+lt
; CHECK: lis [[REG1:.*]], -32768
; CHECK: .LBB0_4:
; CHECK-NOT: mfocrf [[REG2:.*]], [[CREG]]
; CHECK-NOT: rlwinm [[REG2]], [[REG2]]
; CHECK: stw [[REG1]]
; CHECK: .LBB0_1: # %redo_first_pass
; CHECK: # %bb.5:
entry:
%tobool = icmp eq i32 %p2, 0
%tobool2 = icmp eq i32 %p1, 0

View File

@ -0,0 +1,178 @@
# RUN: llc -run-pass simple-register-coalescing %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@b = common dso_local local_unnamed_addr global i32 0, align 4
@d = common dso_local local_unnamed_addr global i32 0, align 4
@e = common dso_local local_unnamed_addr global i32* null, align 8
@c = common dso_local local_unnamed_addr global i32 0, align 4
@a = common dso_local local_unnamed_addr global [1 x i32] zeroinitializer, align 4
; Function Attrs: norecurse nounwind
define dso_local signext i32 @copycrunset() local_unnamed_addr #0 {
entry:
%0 = load i32, i32* @b, align 4
%tobool3 = icmp eq i32 %0, 0
br i1 %tobool3, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
%.pre = load i32, i32* @d, align 4
%tobool1 = icmp eq i32 %.pre, 0
br label %while.body
while.body: ; preds = %land.end, %while.body.preheader
br i1 %tobool1, label %land.end, label %land.rhs
land.rhs: ; preds = %while.body
%1 = load i32*, i32** @e, align 8
%2 = load i32, i32* %1, align 4
%idxprom = sext i32 %2 to i64
%arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 %idxprom
%3 = load i32, i32* %arrayidx, align 4
%tobool2 = icmp ne i32 %3, 0
br label %land.end
land.end: ; preds = %land.rhs, %while.body
%4 = phi i1 [ false, %while.body ], [ %tobool2, %land.rhs ]
%land.ext = zext i1 %4 to i32
store i32 %land.ext, i32* @c, align 4
br label %while.body
while.end: ; preds = %entry
ret i32 undef
}
...
---
name: copycrunset
alignment: 4
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: crbitrc, preferred-register: '' }
- { id: 1, class: crbitrc, preferred-register: '' }
- { id: 2, class: crbitrc, preferred-register: '' }
- { id: 3, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 4, class: gprc, preferred-register: '' }
- { id: 5, class: crrc, preferred-register: '' }
- { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 7, class: gprc, preferred-register: '' }
- { id: 8, class: crrc, preferred-register: '' }
- { id: 9, class: crbitrc, preferred-register: '' }
- { id: 10, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 11, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 12, class: g8rc, preferred-register: '' }
- { id: 13, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 14, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 15, class: g8rc, preferred-register: '' }
- { id: 16, class: gprc, preferred-register: '' }
- { id: 17, class: crrc, preferred-register: '' }
- { id: 18, class: crbitrc, preferred-register: '' }
- { id: 19, class: gprc_and_gprc_nor0, preferred-register: '' }
- { id: 20, class: gprc_and_gprc_nor0, preferred-register: '' }
- { id: 21, class: gprc, preferred-register: '' }
- { id: 22, class: g8rc_and_g8rc_nox0, preferred-register: '' }
- { id: 23, class: g8rc, preferred-register: '' }
- { id: 24, class: crbitrc, preferred-register: '' }
liveins:
- { reg: '$x2', virtual-reg: '' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 0
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.5(0x30000000), %bb.1(0x50000000)
liveins: $x2
%3:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @b
%4:gprc = LWZ target-flags(ppc-toc-lo) @b, killed %3, implicit $x2 :: (dereferenceable load 4 from @b)
%5:crrc = CMPLWI killed %4, 0
BCC 76, killed %5, %bb.5
B %bb.1
bb.1.while.body.preheader:
successors: %bb.2(0x80000000)
liveins: $x2
%6:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @d
%7:gprc = LWZ target-flags(ppc-toc-lo) @d, killed %6, implicit $x2 :: (dereferenceable load 4 from @d)
%8:crrc = CMPWI killed %7, 0
%0:crbitrc = COPY killed %8.sub_eq
%9:crbitrc = CRUNSET
%19:gprc_and_gprc_nor0 = LI 0
%20:gprc_and_gprc_nor0 = LI 1
%22:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @c
%10:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @e
%13:g8rc_and_g8rc_nox0 = ADDIStocHA $x2, @a
%14:g8rc_and_g8rc_nox0 = ADDItocL killed %13, @a, implicit $x2
bb.2.while.body:
successors: %bb.4(0x30000000), %bb.3(0x50000000)
liveins: $x2
%24:crbitrc = COPY %9
BC %0, %bb.4
B %bb.3
bb.3.land.rhs:
successors: %bb.4(0x80000000)
liveins: $x2
%11:g8rc_and_g8rc_nox0 = LD target-flags(ppc-toc-lo) @e, %10, implicit $x2 :: (dereferenceable load 8 from @e)
%12:g8rc = LWA 0, killed %11 :: (load 4 from %ir.1)
%15:g8rc = RLDICR killed %12, 2, 61
%16:gprc = LWZX %14, killed %15 :: (load 4 from %ir.arrayidx)
%17:crrc = CMPWI killed %16, 0
%18:crbitrc = COPY killed %17.sub_eq
%1:crbitrc = CRNOR killed %18, %18
%24:crbitrc = COPY killed %1
bb.4.land.end:
successors: %bb.2(0x80000000)
liveins: $x2
%2:crbitrc = COPY killed %24
%21:gprc = ISEL %20, %19, killed %2
STW killed %21, target-flags(ppc-toc-lo) @c, %22, implicit $x2 :: (store 4 into @c)
B %bb.2
bb.5.while.end:
%23:g8rc = LI8 0
$x3 = COPY killed %23
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
#Copy of CRUNSET should be removed in simple register coalescing pass
#CHECK-LABEL: copycrunset
#CHECK: bb.1.while.body.preheader:
#CHECK-NOT: %9:crbitrc = CRUNSET
#CHECK: bb.2.while.body:
#CHECK-NOT: %24:crbitrc = COPY %9
#CHECK: %24:crbitrc = CRUNSET
#CHECK: B %bb.3