1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[ARM] Comply with rules on ARMv8-A thumb mode partial deprecation of IT.

Summary:
When identifing instructions that can be folded into a MOVCC instruction,
checking for a predicate operand is not enough, also need to check for
thumb2 function, with restrict-IT, is the machine instruction eligible for
ARMv8 IT or not.

Notes in ARMv8-A Architecture Reference Manual, section "Partial deprecation of IT"
  https://usermanual.wiki/Pdf/ARM20Architecture20Reference20ManualARMv8.1667877052.pdf

"ARMv8-A deprecates some uses of the T32 IT instruction. All uses of IT that apply to
instructions other than a single subsequent 16-bit instruction from a restricted set
are deprecated, as are explicit references to the PC within that single 16-bit
instruction. This permits the non-deprecated forms of IT and subsequent instructions
to be treated as a single 32-bit conditional instruction."

Reviewers: efriedma, lebedev.ri, t.p.northover, jmolloy, aemerson, compnerd, stoklund, ostannard

Reviewed By: ostannard

Subscribers: ostannard, javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63474

llvm-svn: 363739
This commit is contained in:
Huihui Zhang 2019-06-18 20:55:09 +00:00
parent 62272f1feb
commit 8b112ee806
9 changed files with 90 additions and 53 deletions

View File

@ -2042,9 +2042,9 @@ MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
MachineInstr *
ARMBaseInstrInfo::canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
@ -2052,8 +2052,8 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
return nullptr;
// MI is folded into the MOVCC by predicating it.
if (!MI->isPredicable())
// Check if MI can be predicated and folded into the MOVCC.
if (!isPredicable(*MI))
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.

View File

@ -398,6 +398,11 @@ private:
void expandMEMCPY(MachineBasicBlock::iterator) const;
/// Identify instructions that can be folded into a MOVCC instruction, and
/// return the defining instruction.
MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) const;
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
@ -526,12 +531,6 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr &MI, unsigned &PredReg);
unsigned getMatchingCondBranchOpcode(unsigned Opc);
/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
/// opcode of the SSA instruction representing the conditional MI.
unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
MachineInstr *&MI,
const MachineRegisterInfo &MRI);
/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
/// the instruction is encoded with an 'S' bit is determined by the optional
/// CPSR def operand.

View File

@ -16,8 +16,9 @@ if.end: ; preds = %if.then, %entry
}
; CHECK-LABEL: function
; CHECK: mul r2, r0, r0
; CHECK: cmp r0, r1
; CHECK-NOT: mulseq r0, r0, r0
; CHECK: muleq r0, r0, r0
; CHECK: moveq r0, r2
; CHECK: bx lr

View File

@ -235,7 +235,7 @@ entry:
}
; CHECK-COMMON-LABEL: icmp_eq_minus_one
; CHECK-COMMON: cmp r0, #255
; CHECK-COMMON: cmp {{r[0-9]+}}, #255
define i32 @icmp_eq_minus_one(i8* %ptr) {
%load = load i8, i8* %ptr, align 1
%conv = zext i8 %load to i32

View File

@ -54,9 +54,9 @@ entry:
; CHECK-DSP: cmp
; CHECK-DSP: cmp
; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2
; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]],
; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2
; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]]
; CHECK-DSP-IMM-NOT: uxt
; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1

View File

@ -168,10 +168,11 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {
;
; V8-LABEL: test_tst_assessment:
; V8: @ %bb.0:
; V8-NEXT: and r0, r0, #1
; V8-NEXT: and r2, r0, #1
; V8-NEXT: subs r0, r2, #1
; V8-NEXT: lsls r1, r1, #31
; V8-NEXT: it ne
; V8-NEXT: subne r0, #1
; V8-NEXT: it eq
; V8-NEXT: moveq r0, r2
; V8-NEXT: bx lr
%and1 = and i32 %a, 1
%sub = sub i32 %and1, 1

View File

@ -440,24 +440,25 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
;
; THUMB8-LABEL: scalar_i64_signbit_eq:
; THUMB8: @ %bb.0:
; THUMB8-NEXT: .save {r7, lr}
; THUMB8-NEXT: push {r7, lr}
; THUMB8-NEXT: rsb.w r3, r2, #32
; THUMB8-NEXT: sub.w lr, r2, #32
; THUMB8-NEXT: .save {r4, lr}
; THUMB8-NEXT: push {r4, lr}
; THUMB8-NEXT: rsb.w r4, r2, #32
; THUMB8-NEXT: sub.w r3, r2, #32
; THUMB8-NEXT: mov.w r12, #-2147483648
; THUMB8-NEXT: cmp.w lr, #0
; THUMB8-NEXT: lsl.w r3, r12, r3
; THUMB8-NEXT: cmp r3, #0
; THUMB8-NEXT: lsl.w r4, r12, r4
; THUMB8-NEXT: lsr.w r2, r12, r2
; THUMB8-NEXT: lsr.w lr, r12, r3
; THUMB8-NEXT: it ge
; THUMB8-NEXT: lsrge.w r3, r12, lr
; THUMB8-NEXT: movge r4, lr
; THUMB8-NEXT: it ge
; THUMB8-NEXT: movge r2, #0
; THUMB8-NEXT: ands r0, r3
; THUMB8-NEXT: ands r0, r4
; THUMB8-NEXT: ands r1, r2
; THUMB8-NEXT: orrs r0, r1
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
; THUMB8-NEXT: pop {r7, pc}
; THUMB8-NEXT: pop {r4, pc}
%t0 = lshr i64 9223372036854775808, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@ -615,28 +616,29 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
;
; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB8: @ %bb.0:
; THUMB8-NEXT: .save {r7, lr}
; THUMB8-NEXT: push {r7, lr}
; THUMB8-NEXT: .save {r4, lr}
; THUMB8-NEXT: push {r4, lr}
; THUMB8-NEXT: movs r3, #0
; THUMB8-NEXT: movw lr, #65535
; THUMB8-NEXT: movt r3, #65535
; THUMB8-NEXT: lsr.w r12, r3, r2
; THUMB8-NEXT: rsb.w r3, r2, #32
; THUMB8-NEXT: lsl.w r3, lr, r3
; THUMB8-NEXT: orr.w r3, r3, r12
; THUMB8-NEXT: sub.w r12, r2, #32
; THUMB8-NEXT: cmp.w r12, #0
; THUMB8-NEXT: orr.w r12, r12, r3
; THUMB8-NEXT: sub.w r3, r2, #32
; THUMB8-NEXT: lsr.w r2, lr, r2
; THUMB8-NEXT: it ge
; THUMB8-NEXT: lsrge.w r3, lr, r12
; THUMB8-NEXT: cmp r3, #0
; THUMB8-NEXT: lsr.w r4, lr, r3
; THUMB8-NEXT: it lt
; THUMB8-NEXT: movlt r4, r12
; THUMB8-NEXT: it ge
; THUMB8-NEXT: movge r2, #0
; THUMB8-NEXT: ands r0, r3
; THUMB8-NEXT: ands r0, r4
; THUMB8-NEXT: ands r1, r2
; THUMB8-NEXT: orrs r0, r1
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
; THUMB8-NEXT: pop {r7, pc}
; THUMB8-NEXT: pop {r4, pc}
%t0 = lshr i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0

View File

@ -518,24 +518,25 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
;
; THUMB8-LABEL: scalar_i64_lowestbit_eq:
; THUMB8: @ %bb.0:
; THUMB8-NEXT: .save {r7, lr}
; THUMB8-NEXT: push {r7, lr}
; THUMB8-NEXT: rsb.w r3, r2, #32
; THUMB8-NEXT: sub.w lr, r2, #32
; THUMB8-NEXT: .save {r4, lr}
; THUMB8-NEXT: push {r4, lr}
; THUMB8-NEXT: rsb.w r4, r2, #32
; THUMB8-NEXT: sub.w r3, r2, #32
; THUMB8-NEXT: mov.w r12, #1
; THUMB8-NEXT: cmp.w lr, #0
; THUMB8-NEXT: lsr.w r3, r12, r3
; THUMB8-NEXT: cmp r3, #0
; THUMB8-NEXT: lsr.w r4, r12, r4
; THUMB8-NEXT: lsl.w r2, r12, r2
; THUMB8-NEXT: lsl.w lr, r12, r3
; THUMB8-NEXT: it ge
; THUMB8-NEXT: lslge.w r3, r12, lr
; THUMB8-NEXT: movge r4, lr
; THUMB8-NEXT: it ge
; THUMB8-NEXT: movge r2, #0
; THUMB8-NEXT: ands r1, r3
; THUMB8-NEXT: ands r1, r4
; THUMB8-NEXT: ands r0, r2
; THUMB8-NEXT: orrs r0, r1
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
; THUMB8-NEXT: pop {r7, pc}
; THUMB8-NEXT: pop {r4, pc}
%t0 = shl i64 1, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
@ -635,28 +636,29 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
;
; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq:
; THUMB8: @ %bb.0:
; THUMB8-NEXT: .save {r7, lr}
; THUMB8-NEXT: push {r7, lr}
; THUMB8-NEXT: .save {r4, lr}
; THUMB8-NEXT: push {r4, lr}
; THUMB8-NEXT: movw r3, #65535
; THUMB8-NEXT: movw lr, #0
; THUMB8-NEXT: lsl.w r12, r3, r2
; THUMB8-NEXT: rsb.w r3, r2, #32
; THUMB8-NEXT: movt lr, #65535
; THUMB8-NEXT: lsr.w r3, lr, r3
; THUMB8-NEXT: orr.w r3, r3, r12
; THUMB8-NEXT: sub.w r12, r2, #32
; THUMB8-NEXT: cmp.w r12, #0
; THUMB8-NEXT: orr.w r12, r12, r3
; THUMB8-NEXT: sub.w r3, r2, #32
; THUMB8-NEXT: lsl.w r2, lr, r2
; THUMB8-NEXT: it ge
; THUMB8-NEXT: lslge.w r3, lr, r12
; THUMB8-NEXT: cmp r3, #0
; THUMB8-NEXT: lsl.w r4, lr, r3
; THUMB8-NEXT: it lt
; THUMB8-NEXT: movlt r4, r12
; THUMB8-NEXT: it ge
; THUMB8-NEXT: movge r2, #0
; THUMB8-NEXT: ands r1, r3
; THUMB8-NEXT: ands r1, r4
; THUMB8-NEXT: ands r0, r2
; THUMB8-NEXT: orrs r0, r1
; THUMB8-NEXT: clz r0, r0
; THUMB8-NEXT: lsrs r0, r0, #5
; THUMB8-NEXT: pop {r7, pc}
; THUMB8-NEXT: pop {r4, pc}
%t0 = shl i64 281474976645120, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -mtriple=thumbv7 -o - | llvm-mc -triple thumbv7 --show-encoding 2>&1 | FileCheck %s --check-prefix=V7
; RUN: llc < %s -mtriple=thumbv7 -arm-restrict-it -o - | llvm-mc -triple thumbv7 --show-encoding 2>&1 | FileCheck %s --check-prefix=V7_RESTRICT_IT
; RUN: llc < %s -mtriple=thumbv8 -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8
; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -o - | llvm-mc -triple thumbv8 --show-encoding 2>&1 | FileCheck %s --check-prefix=V8_NO_RESTRICT_IT
; V7-NOT: warning
; V7_RESTRICT_IT-NOT: warning
; V8-NOT: warning
; V8_NO_RESTRICT_IT: warning: deprecated instruction in IT block
; it ge @ encoding: [0xa8,0xbf]
; lslge.w r3, r12, lr @ encoding: [0x0c,0xfa,0x0e,0xf3] ; deprecated in ARMv8 thumb mode
define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) {
%t0 = shl i64 1, %y
%t1 = and i64 %t0, %x
%res = icmp eq i64 %t1, 0
ret i1 %res
}
; V7-NOT: warning
; V7_RESTRICT_IT-NOT: warning
; V8-NOT: warning
; V8_NO_RESTRICT_IT: warning: deprecated instruction in IT block
; it ne @ encoding: [0x18,0xbf]
; movne.w r0, #-1 @ encoding: [0x4f,0xf0,0xff,0x30] ; deprecated in ARMv8 thumb mode
define i32 @icmp_eq_minus_one(i8* %ptr) {
%load = load i8, i8* %ptr, align 1
%conv = zext i8 %load to i32
%cmp = icmp eq i8 %load, -1
%ret = select i1 %cmp, i32 %conv, i32 -1
ret i32 %ret
}