1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[AArch64] Improve v8.1-A code-gen for atomic load-subtract

Armv8.1-A added an atomic load-add instruction, but not a load-subtract
instruction. Our current code-generation for atomic load-subtract always
inserts a NEG instruction to negate it's argument, even if it could be
folded into a constant or another instruction.

This adds lowering early in selection DAG to convert a load-subtract
operation into a subtract and a load-add, allowing the normal DAG
optimisations to work on it.

I've left the old tablegen patterns in because they are still needed for
global isel.

Some of the tests in this patch are copied from D35375 by Chad Rosier (which
was abandoned).

Differential revision: https://reviews.llvm.org/D42477

llvm-svn: 324892
This commit is contained in:
Oliver Stannard 2018-02-12 14:22:03 +00:00
parent ff0846d7c8
commit 8dae8033b4
3 changed files with 134 additions and 0 deletions

View File

@ -464,6 +464,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
@ -2679,6 +2681,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
}
}
@ -7373,6 +7377,23 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
if (!Subtarget.hasLSE())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.

View File

@ -596,6 +596,7 @@ private:
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;

View File

@ -814,6 +814,118 @@ define void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind {
ret void
}
define i8 @test_atomic_load_sub_i8_neg_imm() nounwind {
; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm:
%old = atomicrmw sub i8* @var8, i8 -1 seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i8 %old
}
define i16 @test_atomic_load_sub_i16_neg_imm() nounwind {
; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm:
%old = atomicrmw sub i16* @var16, i16 -1 seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i16 %old
}
define i32 @test_atomic_load_sub_i32_neg_imm() nounwind {
; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm:
%old = atomicrmw sub i32* @var32, i32 -1 seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i32 %old
}
define i64 @test_atomic_load_sub_i64_neg_imm() nounwind {
; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm:
%old = atomicrmw sub i64* @var64, i64 -1 seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
; CHECK: orr w[[IMM:[0-9]+]], wzr, #0x1
; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i64 %old
}
define i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg:
%neg = sub i8 0, %offset
%old = atomicrmw sub i8* @var8, i8 %neg seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i8 %old
}
define i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg:
%neg = sub i16 0, %offset
%old = atomicrmw sub i16* @var16, i16 %neg seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i16 %old
}
define i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg:
%neg = sub i32 0, %offset
%old = atomicrmw sub i32* @var32, i32 %neg seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i32 %old
}
define i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg:
%neg = sub i64 0, %offset
%old = atomicrmw sub i64* @var64, i64 %neg seq_cst
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64
; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]]
; CHECK-NOT: dmb
ret i64 %old
}
define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
; CHECK-LABEL: test_atomic_load_and_i8:
%old = atomicrmw and i8* @var8, i8 %offset seq_cst