1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[AArch64] Fix comparison peephole opt with non-0/1 immediate (PR51476)

This is a non-intrusive fix for
https://bugs.llvm.org/show_bug.cgi?id=51476 intended for backport
to the 13.x release branch. It expands on the current hack by
distinguishing between CmpValue of 0, 1 and 2, where 0 and 1 have
the obvious meaning and 2 means "anything else". The new optimization
from D98564 should only be performed for CmpValue of 0 or 1.

For main, I think we should switch the analyzeCompare() and
optimizeCompare() APIs to use int64_t instead of int, which is in
line with MachineOperand's notion of an immediate, and avoids this
problem altogether.

Differential Revision: https://reviews.llvm.org/D108076

(cherry picked from commit 81b106584f2baf33e09be2362c35c1bf2f6bfe94)
This commit is contained in:
Nikita Popov 2021-08-14 23:35:27 +02:00 committed by Tom Stellard
parent 45d26b8826
commit e4e6f3eeff
3 changed files with 93 additions and 15 deletions

View File

@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
if (!MI.getOperand(1).isReg()) if (!MI.getOperand(1).isReg())
return false; return false;
auto NormalizeCmpValue = [](int64_t Value) -> int {
// Comparison immediates may be 64-bit, but CmpValue is only an int.
// Normalize to 0/1/2 return value, where 2 indicates any value apart from
// 0 or 1.
// TODO: Switch CmpValue to int64_t in the API to avoid this.
if (Value == 0 || Value == 1)
return Value;
return 2;
};
switch (MI.getOpcode()) { switch (MI.getOpcode()) {
default: default:
break; break;
@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg(); SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0; SrcReg2 = 0;
CmpMask = ~0; CmpMask = ~0;
// FIXME: In order to convert CmpValue to 0 or 1 CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
CmpValue = MI.getOperand(2).getImm() != 0;
return true; return true;
case AArch64::ANDSWri: case AArch64::ANDSWri:
case AArch64::ANDSXri: case AArch64::ANDSXri:
@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg(); SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0; SrcReg2 = 0;
CmpMask = ~0; CmpMask = ~0;
// FIXME:The return val type of decodeLogicalImmediate is uint64_t, CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
// while the type of CmpValue is int. When converting uint64_t to int,
// the high 32 bits of uint64_t will be lost.
// In fact it causes a bug in spec2006-483.xalancbmk
// CmpValue is only used to compare with zero in OptimizeCompareInstr
CmpValue = AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(), MI.getOperand(2).getImm(),
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
return true; return true;
} }
@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP) if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
// Continue only if we have a "ri" where immediate is zero. // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
// function. "CmpValue must be 0, 1, or 2!");
assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (SrcReg2 != 0) if (SrcReg2 != 0)
return false; return false;
@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false; return false;
if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true; return true;
return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); return (CmpValue == 0 || CmpValue == 1) &&
removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
} }
/// Get opcode of S version of Instr. /// Get opcode of S version of Instr.

View File

@ -307,3 +307,42 @@ body: |
RET_ReallyLR RET_ReallyLR
... ...
---
name: subswr_wrong_cmp_value
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: subswr_wrong_cmp_value
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: liveins: $x1
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1
; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv
; CHECK: Bcc 1, %bb.2, implicit $nzcv
; CHECK: B %bb.1
; CHECK: bb.1:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: B %bb.2
; CHECK: bb.2:
; CHECK: RET_ReallyLR
bb.0:
liveins: $x1
successors: %bb.1(0x40000000), %bb.2(0x40000000)
%1:gpr64common = COPY $x1
%2:gpr64 = IMPLICIT_DEF
%3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv
%4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv
%5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv
Bcc 1, %bb.2, implicit $nzcv
B %bb.1
bb.1:
successors: %bb.2(0x80000000)
B %bb.2
bb.2:
RET_ReallyLR
...

View File

@ -0,0 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
define void @test(i8 %arg) nounwind {
; CHECK-LABEL: test:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: and w8, w0, #0xff
; CHECK-NEXT: cmp w8, #1
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: cmp w0, #3
; CHECK-NEXT: strb w0, [sp, #12]
; CHECK-NEXT: b.eq .LBB0_2
; CHECK-NEXT: // %bb.1: // %do_call
; CHECK-NEXT: bl unknown
; CHECK-NEXT: .LBB0_2: // %common.ret
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%tmp = alloca i8
%cmp1 = icmp ne i8 %arg, 1
%zext = zext i1 %cmp1 to i8
store i8 %zext, i8* %tmp
%zext2 = load i8, i8* %tmp
%cmp2 = icmp eq i8 %zext2, 3
br i1 %cmp2, label %exit, label %do_call
do_call:
call void @unknown(i8 %zext2)
ret void
exit:
ret void
}
declare void @unknown(i8)