mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-29 23:12:55 +01:00
3bb84c9bcc
The C and C++ semantics for compare_exchange require it to return a bool indicating success. This gets mapped to LLVM IR which follows each cmpxchg with an icmp of the value loaded against the desired value. When lowered to ldxr/stxr loops, this extra comparison is redundant: its results are implicit in the control-flow of the function. This commit makes two changes: it replaces that icmp with appropriate PHI nodes, and then makes sure earlyCSE is called after expansion to actually make use of the opportunities revealed. I've also added -{arm,aarch64}-enable-atomic-tidy options, so that existing fragile tests aren't perturbed too much by the change. Many of them either rely on undef/unreachable too pervasively to be restored to something well-defined (particularly while making sure they test the same obscure assert from many years ago), or depend on a particular CFG shape, which is disrupted by SimplifyCFG. rdar://problem/16227836 llvm-svn: 209883
22 lines
833 B
LLVM
22 lines
833 B
LLVM
; RUN: llc -mcpu=cortex-a8 -arm-atomic-cfg-tidy=0 < %s | FileCheck %s
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
|
|
target triple = "armv7-eabi"
|
|
|
|
define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
|
|
entry:
|
|
br i1 undef, label %return, label %bb
|
|
|
|
bb: ; preds = %bb, %entry
|
|
; CHECK: vld1.16 {d16[], d17[]}
|
|
%0 = load i16* undef, align 2
|
|
%1 = insertelement <8 x i16> undef, i16 %0, i32 2
|
|
%2 = insertelement <8 x i16> %1, i16 undef, i32 3
|
|
%3 = mul <8 x i16> %2, %2
|
|
%4 = extractelement <8 x i16> %3, i32 2
|
|
store i16 %4, i16* undef, align 2
|
|
br i1 undef, label %return, label %bb
|
|
|
|
return: ; preds = %bb, %entry
|
|
ret void
|
|
}
|