mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[X86] Fold (setcc (cmp (atomic_load_add x, -C) C), COND) to (setcc (LADD x, -C), COND) (PR31367)
atomic_load_add returns the value before addition, but sets EFLAGS based on the result of the addition. That means it's setting the flags based on effectively subtracting C from the value at x, which is also what the outer cmp does. This targets a pattern that occurs frequently with reference counting pointers: void decrement(long volatile *ptr) { if (_InterlockedDecrement(ptr) == 0) release(); } Clang would previously compile it (for 32-bit at -Os) as: 00000000 <?decrement@@YAXPCJ@Z>: 0: 8b 44 24 04 mov 0x4(%esp),%eax 4: 31 c9 xor %ecx,%ecx 6: 49 dec %ecx 7: f0 0f c1 08 lock xadd %ecx,(%eax) b: 83 f9 01 cmp $0x1,%ecx e: 0f 84 00 00 00 00 je 14 <?decrement@@YAXPCJ@Z+0x14> 14: c3 ret and with this patch it becomes: 00000000 <?decrement@@YAXPCJ@Z>: 0: 8b 44 24 04 mov 0x4(%esp),%eax 4: f0 ff 08 lock decl (%eax) 7: 0f 84 00 00 00 00 je d <?decrement@@YAXPCJ@Z+0xd> d: c3 ret (Equivalent variants with _InterlockedExchangeAdd, std::atomic<>'s fetch_add or pre-decrement operator generate the same code.) Differential Revision: https://reviews.llvm.org/D27781 llvm-svn: 289955
This commit is contained in:
parent
92b32384c1
commit
4441124e06
@ -28879,11 +28879,19 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Combine:
|
||||
/// Combine brcond/cmov/setcc/.. based on comparing the result of
|
||||
/// atomic_load_add to use EFLAGS produced by the addition
|
||||
/// directly if possible. For example:
|
||||
///
|
||||
/// (setcc (cmp (atomic_load_add x, -C) C), COND_E)
|
||||
/// becomes:
|
||||
/// (setcc (LADD x, -C), COND_E)
|
||||
///
|
||||
/// and
|
||||
/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)
|
||||
/// to:
|
||||
/// becomes:
|
||||
/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)
|
||||
/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
|
||||
///
|
||||
/// Note that this is only legal for some op/cc combinations.
|
||||
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
|
||||
SelectionDAG &DAG) {
|
||||
@ -28892,7 +28900,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
|
||||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
|
||||
return SDValue();
|
||||
|
||||
// This only applies to variations of the common case:
|
||||
// This applies to variations of the common case:
|
||||
// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)
|
||||
// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)
|
||||
// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)
|
||||
@ -28911,8 +28919,9 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
|
||||
return SDValue();
|
||||
|
||||
auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);
|
||||
if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)
|
||||
if (!CmpRHSC)
|
||||
return SDValue();
|
||||
APInt Comparand = CmpRHSC->getAPIntValue();
|
||||
|
||||
const unsigned Opc = CmpLHS.getOpcode();
|
||||
|
||||
@ -28928,13 +28937,15 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
|
||||
if (Opc == ISD::ATOMIC_LOAD_SUB)
|
||||
Addend = -Addend;
|
||||
|
||||
if (CC == X86::COND_S && Addend == 1)
|
||||
if (Comparand == -Addend)
|
||||
CC = CC; // No change.
|
||||
else if (CC == X86::COND_S && Comparand == 0 && Addend == 1)
|
||||
CC = X86::COND_LE;
|
||||
else if (CC == X86::COND_NS && Addend == 1)
|
||||
else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1)
|
||||
CC = X86::COND_G;
|
||||
else if (CC == X86::COND_G && Addend == -1)
|
||||
else if (CC == X86::COND_G && Comparand == 0 && Addend == -1)
|
||||
CC = X86::COND_GE;
|
||||
else if (CC == X86::COND_LE && Addend == -1)
|
||||
else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1)
|
||||
CC = X86::COND_L;
|
||||
else
|
||||
return SDValue();
|
||||
|
@ -176,4 +176,45 @@ entry:
|
||||
ret i8 %tmp2
|
||||
}
|
||||
|
||||
define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
|
||||
; CHECK-LABEL: test_sub_1_setcc_eq:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: lock decq (%rdi)
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
|
||||
%tmp1 = icmp eq i64 %tmp0, 1
|
||||
%tmp2 = zext i1 %tmp1 to i8
|
||||
ret i8 %tmp2
|
||||
}
|
||||
|
||||
define i8 @test_add_5_setcc_ne(i64* %p) #0 {
|
||||
; CHECK-LABEL: test_add_5_setcc_ne:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: lock addq $5, (%rdi)
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
|
||||
%tmp1 = icmp ne i64 %tmp0, -5
|
||||
%tmp2 = zext i1 %tmp1 to i8
|
||||
ret i8 %tmp2
|
||||
}
|
||||
|
||||
define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 {
|
||||
; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: movl $5, %eax
|
||||
; CHECK-NEXT: lock xaddq %rax, (%rdi)
|
||||
; CHECK-NEXT: testq %rax, %rax
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tmp0 = atomicrmw add i64* %p, i64 5 seq_cst
|
||||
%tmp1 = icmp ne i64 %tmp0, 0
|
||||
%tmp2 = zext i1 %tmp1 to i8
|
||||
ret i8 %tmp2
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
Loading…
x
Reference in New Issue
Block a user