mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[AArch64] Peephole optimization: merge AND and TST instructions
In some cases Clang does not perform merging of instructions AND and TST (aka ANDS xzr). Example: tst x2, x1 and x3, x2, x1 to: ands x3, x2, x1 This patch add such merging during instruction selection: when AND is replaced with ANDS instruction in LowerSELECT_CC, all users of AND also should be changed for using this ANDS instruction Short discussion on mailing list: http://llvm.1065342.n5.nabble.com/llvm-dev-ARM-Peephole-optimization-instructions-tst-add-tp133109.html Patch by Pavel Kosov. Differential Revision: https://reviews.llvm.org/D71701
This commit is contained in:
parent
7f3f3aa402
commit
ff9f01c06e
@ -2702,7 +2702,8 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
|
||||
// bits that are implicitly ANDed off by the above opcodes and if so, skip
|
||||
// the AND.
|
||||
uint64_t MaskImm;
|
||||
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm))
|
||||
if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
|
||||
!isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
|
||||
return false;
|
||||
|
||||
if (countTrailingOnes(MaskImm) < Bits)
|
||||
|
@ -1754,14 +1754,22 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
|
||||
Opcode = AArch64ISD::ADDS;
|
||||
LHS = LHS.getOperand(1);
|
||||
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
|
||||
!isUnsignedIntSetCC(CC)) {
|
||||
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
|
||||
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
|
||||
// of the signed comparisons.
|
||||
Opcode = AArch64ISD::ANDS;
|
||||
RHS = LHS.getOperand(1);
|
||||
LHS = LHS.getOperand(0);
|
||||
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
|
||||
if (LHS.getOpcode() == ISD::AND) {
|
||||
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
|
||||
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
|
||||
// of the signed comparisons.
|
||||
const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
|
||||
DAG.getVTList(VT, MVT_CC),
|
||||
LHS.getOperand(0),
|
||||
LHS.getOperand(1));
|
||||
// Replace all users of (and X, Y) with newly generated (ands X, Y)
|
||||
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
|
||||
return ANDSNode.getValue(1);
|
||||
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
|
||||
// Use result of ANDS
|
||||
return LHS.getValue(1);
|
||||
}
|
||||
}
|
||||
|
||||
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
|
||||
|
@ -18,12 +18,11 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
|
||||
define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; CHECK-LABEL: fshl_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w9, w2, #0x1f
|
||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
||||
; CHECK-NEXT: neg w9, w9
|
||||
; CHECK-NEXT: lsl w8, w0, w2
|
||||
; CHECK-NEXT: lsr w9, w1, w9
|
||||
; CHECK-NEXT: orr w8, w8, w9
|
||||
; CHECK-NEXT: tst w2, #0x1f
|
||||
; CHECK-NEXT: csel w0, w0, w8, eq
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
|
||||
@ -146,12 +145,11 @@ define i8 @fshl_i8_const_fold() {
|
||||
define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
|
||||
; CHECK-LABEL: fshr_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w9, w2, #0x1f
|
||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
||||
; CHECK-NEXT: neg w9, w9
|
||||
; CHECK-NEXT: lsr w8, w1, w2
|
||||
; CHECK-NEXT: lsl w9, w0, w9
|
||||
; CHECK-NEXT: orr w8, w9, w8
|
||||
; CHECK-NEXT: tst w2, #0x1f
|
||||
; CHECK-NEXT: csel w0, w1, w8, eq
|
||||
; CHECK-NEXT: ret
|
||||
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
|
||||
|
81
test/CodeGen/AArch64/peephole-and-tst.ll
Normal file
81
test/CodeGen/AArch64/peephole-and-tst.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
|
||||
|
||||
%struct.anon = type { i32*, i32* }
|
||||
|
||||
@ptr_wrapper = common dso_local local_unnamed_addr global %struct.anon* null, align 8
|
||||
|
||||
define dso_local i32 @test_func_i32_two_uses(i32 %in, i32 %bit, i32 %mask) local_unnamed_addr {
|
||||
entry:
|
||||
%0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8
|
||||
%result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1
|
||||
%tobool2 = icmp ne i32 %mask, 0
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %4, %entry
|
||||
; CHECK-LABEL: test_func_i32_two_uses:
|
||||
; CHECK: ands [[DSTREG:w[0-9]+]]
|
||||
; Usage #1
|
||||
; CHECK: cmp [[DSTREG]]
|
||||
; Usage #2
|
||||
; CHECK: cbz [[DSTREG]]
|
||||
%bit.addr.0 = phi i32 [ %bit, %entry ], [ %shl, %4 ]
|
||||
%retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ]
|
||||
%and = and i32 %bit.addr.0, %in
|
||||
%tobool = icmp eq i32 %and, 0
|
||||
%not.tobool = xor i1 %tobool, true
|
||||
%inc = zext i1 %not.tobool to i32
|
||||
%retval1.1 = add nuw nsw i32 %retval1.0, %inc
|
||||
%1 = xor i1 %tobool, true
|
||||
%2 = or i1 %tobool2, %1
|
||||
%dummy = and i32 %mask, %in
|
||||
%use_and = icmp eq i32 %and, %dummy
|
||||
%dummy_or = or i1 %use_and, %2
|
||||
br i1 %dummy_or, label %3, label %4
|
||||
|
||||
3: ; preds = %do.body
|
||||
store i32* null, i32** %result, align 8
|
||||
br label %4
|
||||
|
||||
4: ; preds = %do.body, %3
|
||||
%shl = shl i32 %bit.addr.0, 1
|
||||
%tobool6 = icmp eq i32 %shl, 0
|
||||
br i1 %tobool6, label %do.end, label %do.body
|
||||
|
||||
do.end: ; preds = %4
|
||||
ret i32 %retval1.1
|
||||
}
|
||||
|
||||
define dso_local i32 @test_func_i64_one_use(i64 %in, i64 %bit, i64 %mask) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%0 = load %struct.anon*, %struct.anon** @ptr_wrapper, align 8
|
||||
%result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1
|
||||
%tobool2 = icmp ne i64 %mask, 0
|
||||
br label %do.body
|
||||
|
||||
do.body: ; preds = %4, %entry
|
||||
; CHECK-LABEL: test_func_i64_one_use:
|
||||
; CHECK: ands [[DSTREG:x[0-9]+]], [[SRCREG1:x[0-9]+]], [[SRCREG2:x[0-9]+]]
|
||||
; CHECK-NEXT: orr [[DSTREG]], [[SRCREG_ORR:x[0-9]+]], [[DSTREG]]
|
||||
%bit.addr.0 = phi i64 [ %bit, %entry ], [ %shl, %4 ]
|
||||
%retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %4 ]
|
||||
%and = and i64 %bit.addr.0, %in
|
||||
%tobool = icmp eq i64 %and, 0
|
||||
%not.tobool = xor i1 %tobool, true
|
||||
%inc = zext i1 %not.tobool to i32
|
||||
%retval1.1 = add nuw nsw i32 %retval1.0, %inc
|
||||
%1 = xor i1 %tobool, true
|
||||
%2 = or i1 %tobool2, %1
|
||||
br i1 %2, label %3, label %4
|
||||
|
||||
3: ; preds = %do.body
|
||||
store i32* null, i32** %result, align 8
|
||||
br label %4
|
||||
|
||||
4: ; preds = %do.body, %3
|
||||
%shl = shl i64 %bit.addr.0, 1
|
||||
%tobool6 = icmp eq i64 %shl, 0
|
||||
br i1 %tobool6, label %do.end, label %do.body
|
||||
|
||||
do.end: ; preds = %4
|
||||
ret i32 %retval1.1
|
||||
}
|
@ -80,12 +80,11 @@ declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
|
||||
define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||
; CHECK-LABEL: n6_fshl:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w9, w2, #0x1f
|
||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
||||
; CHECK-NEXT: neg w9, w9
|
||||
; CHECK-NEXT: lsl w8, w0, w2
|
||||
; CHECK-NEXT: lsr w9, w1, w9
|
||||
; CHECK-NEXT: orr w8, w8, w9
|
||||
; CHECK-NEXT: tst w2, #0x1f
|
||||
; CHECK-NEXT: csel w0, w0, w8, eq
|
||||
; CHECK-NEXT: ret
|
||||
%shamt_wide = sext i8 %shamt to i32
|
||||
@ -95,12 +94,11 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||
define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
|
||||
; CHECK-LABEL: n7_fshr:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and w9, w2, #0x1f
|
||||
; CHECK-NEXT: ands w9, w2, #0x1f
|
||||
; CHECK-NEXT: neg w9, w9
|
||||
; CHECK-NEXT: lsr w8, w1, w2
|
||||
; CHECK-NEXT: lsl w9, w0, w9
|
||||
; CHECK-NEXT: orr w8, w9, w8
|
||||
; CHECK-NEXT: tst w2, #0x1f
|
||||
; CHECK-NEXT: csel w0, w1, w8, eq
|
||||
; CHECK-NEXT: ret
|
||||
%shamt_wide = sext i8 %shamt to i32
|
||||
|
Loading…
x
Reference in New Issue
Block a user