1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM] Add some missing thumb1 opcodes to enable peephole optimisation of CMPs

This adds a number of missing Thumb1 opcodes so that the peephole optimiser can
remove redundant CMP instructions.

Reapplying this after the first attempt broke non-thumb1 code as the t2ADDri
instruction can be used with frame indices. In thumb1 we use tADDframe.

Differential Revision: https://reviews.llvm.org/D57833

llvm-svn: 354667
This commit is contained in:
David Green 2019-02-22 12:23:31 +00:00
parent b059d0c690
commit 62ceaab06e
4 changed files with 324 additions and 13 deletions

View File

@ -2542,6 +2542,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
return true;
case ARM::CMPrr:
case ARM::t2CMPrr:
case ARM::tCMPr:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
CmpMask = ~0;
@ -2618,28 +2619,62 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
unsigned SrcReg, unsigned SrcReg2,
int ImmValue, const MachineInstr *OI) {
int ImmValue, const MachineInstr *OI,
bool &IsThumb1) {
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
((OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getReg() == SrcReg2) ||
(OI->getOperand(1).getReg() == SrcReg2 &&
OI->getOperand(2).getReg() == SrcReg)))
OI->getOperand(2).getReg() == SrcReg))) {
IsThumb1 = false;
return true;
}
if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&
((OI->getOperand(2).getReg() == SrcReg &&
OI->getOperand(3).getReg() == SrcReg2) ||
(OI->getOperand(2).getReg() == SrcReg2 &&
OI->getOperand(3).getReg() == SrcReg))) {
IsThumb1 = true;
return true;
}
if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&
(OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&
OI->getOperand(1).getReg() == SrcReg &&
OI->getOperand(2).getImm() == ImmValue)
OI->getOperand(2).getImm() == ImmValue) {
IsThumb1 = false;
return true;
}
if (CmpI->getOpcode() == ARM::tCMPi8 &&
(OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&
OI->getOperand(2).getReg() == SrcReg &&
OI->getOperand(3).getImm() == ImmValue) {
IsThumb1 = true;
return true;
}
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||
OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&
OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&
OI->getOperand(0).getReg() == SrcReg &&
OI->getOperand(1).getReg() == SrcReg2)
OI->getOperand(1).getReg() == SrcReg2) {
IsThumb1 = false;
return true;
}
if (CmpI->getOpcode() == ARM::tCMPr &&
(OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||
OI->getOpcode() == ARM::tADDrr) &&
OI->getOperand(0).getReg() == SrcReg &&
OI->getOperand(2).getReg() == SrcReg2) {
IsThumb1 = true;
return true;
}
return false;
}
@ -2756,7 +2791,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.
// Thus we cannot return here.
if (CmpInstr.getOpcode() == ARM::CMPri ||
CmpInstr.getOpcode() == ARM::t2CMPri)
CmpInstr.getOpcode() == ARM::t2CMPri ||
CmpInstr.getOpcode() == ARM::tCMPi8)
MI = nullptr;
else
return false;
@ -2800,11 +2836,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for SubAdd.
const TargetRegisterInfo *TRI = &getRegisterInfo();
bool SubAddIsThumb1 = false;
do {
const MachineInstr &Instr = *--I;
// Check whether CmpInstr can be made redundant by the current instruction.
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr)) {
if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,
SubAddIsThumb1)) {
SubAdd = &*I;
break;
}
@ -2828,7 +2866,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// If we found a SubAdd, use it as it will be closer to the CMP
if (SubAdd) {
MI = SubAdd;
IsThumb1 = false;
IsThumb1 = SubAddIsThumb1;
}
// We can't use a predicated instruction - it doesn't always write the flags.
@ -2897,9 +2935,13 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// operands will be modified.
unsigned Opc = SubAdd->getOpcode();
bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||
Opc == ARM::SUBri || Opc == ARM::t2SUBri;
if (!IsSub || (SrcReg2 != 0 && SubAdd->getOperand(1).getReg() == SrcReg2 &&
SubAdd->getOperand(2).getReg() == SrcReg)) {
Opc == ARM::SUBri || Opc == ARM::t2SUBri ||
Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||
Opc == ARM::tSUBi8;
unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;
if (!IsSub ||
(SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&
SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {
// VSel doesn't support condition code update.
if (IsInstrVSel)
return false;
@ -2977,9 +3019,10 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
++Next;
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
bool IsThumb1;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI))
isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))
return false;
return true;
}

View File

@ -38,8 +38,7 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
; ARM: movvc r[[R0]], #0
; ARM: mov pc, lr
; THUMBV6: adds r1, r0, r1
; THUMBV6: cmp r1, r0
; THUMBV6: adds r0, r0, r1
; THUMBV6: bvc .LBB1_2
; THUMBV7: adds r[[R2:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]

View File

@ -0,0 +1,226 @@
# RUN: llc -mtriple thumbv8m.base-none-eabi -run-pass=peephole-opt -verify-machineinstrs -o - %s | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv8m.base-none-none-eabi"
define i32 @test_subrr(i32 %a, i32 %b) { ret i32 %a }
define i32 @test_subrr_c(i32 %a, i32 %b) { ret i32 %a }
define i32 @test_subri3(i32 %a) { ret i32 %a }
define i32 @test_subri8(i32 %a) { ret i32 %a }
define i32 @test_addrr(i32 %a) { ret i32 %a }
define i32 @test_addri3(i32 %a) { ret i32 %a }
define i32 @test_addri8(i32 %a) { ret i32 %a }
...
---
name: test_subrr
liveins:
- { reg: '$r0', virtual-reg: '%1' }
- { reg: '$r1', virtual-reg: '%2' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0, $r1
%2:tgpr = COPY $r1
%1:tgpr = COPY $r0
%0:tgpr, $cpsr = tSUBrr %2, %1, 14, $noreg
tCMPr %1, %2, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %3
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_subrr
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBrr [[COPY1]], [[COPY0]], 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 8, $cpsr
...
---
name: test_subrr_c
liveins:
- { reg: '$r0', virtual-reg: '%1' }
- { reg: '$r1', virtual-reg: '%2' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0, $r1
%2:tgpr = COPY $r1
%1:tgpr = COPY $r0
%0:tgpr, $cpsr = tSUBrr %1, %2, 14, $noreg
tCMPr %1, %2, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %3
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_subrr_c
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBrr [[COPY0]], [[COPY1]], 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
...
---
name: test_subri3
liveins:
- { reg: '$r0', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0
%1:tgpr = COPY $r0
%0:tgpr, $cpsr = tSUBi3 %1, 1, 14, $noreg
tCMPi8 %1, 1, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_subri3
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBi3 [[COPY]], 1, 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
...
---
name: test_subri8
liveins:
- { reg: '$r0', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0
%1:tgpr = COPY $r0
%0:tgpr, $cpsr = tSUBi8 %1, 1, 14, $noreg
tCMPi8 %1, 1, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_subri8
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tSUBi8 [[COPY]], 1, 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 3, $cpsr
...
---
name: test_addrr
liveins:
- { reg: '$r0', virtual-reg: '%1' }
- { reg: '$r1', virtual-reg: '%2' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0, $r1
%2:tgpr = COPY $r1
%1:tgpr = COPY $r0
%0:tgpr, $cpsr = tADDrr %2, %1, 14, $noreg
tCMPr %0, %2, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%3:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %3
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_addrr
# CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r1
# CHECK-NEXT: [[COPY0:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDrr [[COPY1]], [[COPY0]], 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
...
---
name: test_addri3
liveins:
- { reg: '$r0', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0
%0:tgpr = COPY $r0
%1:tgpr, $cpsr = tADDi3 %0, 1, 14, $noreg
tCMPr %1, %0, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_addri3
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDi3 [[COPY]], 1, 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
...
---
name: test_addri8
liveins:
- { reg: '$r0', virtual-reg: '%1' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0
%0:tgpr = COPY $r0
%1:tgpr, $cpsr = tADDi8 %0, 10, 14, $noreg
tCMPr %1, %0, 14, $noreg, implicit-def $cpsr
tBcc %bb.2, 3, $cpsr
tB %bb.1, 14, $noreg
bb.1:
$r0 = COPY %0
tBX_RET 14, $noreg, implicit $r0
bb.2:
%2:tgpr, dead $cpsr = tMOVi8 0, 14, $noreg
$r0 = COPY %2
tBX_RET 14, $noreg, implicit $r0
# CHECK-LABEL: name: test_addri8
# CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0
# CHECK-NEXT: [[ADD:%[0-9]+]]:tgpr, $cpsr = tADDi8 [[COPY]], 10, 14, $noreg
# CHECK-NEXT: tBcc %bb.2, 2, $cpsr
...

View File

@ -0,0 +1,43 @@
# RUN: llc -run-pass=peephole-opt -verify-machineinstrs -o - %s | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7m-none-none-eabi"
define i32 @test_addir_frameindex(i32 %a) {
%f = alloca i32
ret i32 %a
}
...
---
name: test_addir_frameindex
liveins:
- { reg: '$r0', virtual-reg: '%0' }
stack:
- { id: 0, name: f, type: default, offset: 0, size: 1, alignment: 4,
stack-id: 0, callee-saved-register: '', callee-saved-restored: true,
local-offset: -4, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
body: |
bb.0:
successors: %bb.2(0x40000000), %bb.1(0x40000000)
liveins: $r0
%0:rgpr = COPY $r0
%1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg
t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr
t2Bcc %bb.2, 3, $cpsr
t2B %bb.1, 14, $noreg
bb.1:
$r0 = COPY %1
tBX_RET 14, $noreg
bb.2:
$r0 = COPY %0
tBX_RET 14, $noreg
# CHECK-LABEL: name: test_addir_frameindex
# CHECK: %1:gprnopc = t2ADDri %stack.0.f, 0, 14, $noreg, $noreg
# CHECK-NEXT: t2CMPrr %1, %0, 14, $noreg, implicit-def $cpsr
# CHECK-NEXT: t2Bcc %bb.2, 3, $cpsr
...