mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
cb60eaba94
This is a resubmit of r182877, which was reverted because it broken MCJIT tests on ARM. The patch leaves MCJIT on ARM as it was before: only enabled for iOS. I've CC'ed people from the original review and revert. FastISel was only enabled for iOS ARM and Thumb2, this patch enables it for ARM (not Thumb2) on Linux and NaCl, but not MCJIT. Thumb2 support needs a bit more work, mainly around register class restrictions. The patch punts to SelectionDAG when doing TLS relocation on non-Darwin targets. I will fix this and other FastISel-to-SelectionDAG failures in a separate patch. The patch also forces FastISel to retain frame pointers: iOS always keeps them for backtracking (so emitted code won't change because of this), but Linux was getting much worse code that was incorrect when using big frames (such as test-suite's lencod). I'll also fix this in a later patch, it will probably require a peephole so that FastISel doesn't rematerialize frame pointers back-to-back. The test changes are straightforward, similar to: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20130513/174279.html They also add a vararg test that got dropped in that change. I ran all of lnt test-suite on A15 hardware with --optimize-option=-O0 and all the tests pass. All the tests also pass on x86 make check-all. I also re-ran the check-all tests that failed on ARM, and they all seem to pass. llvm-svn: 183966
60 lines
2.3 KiB
LLVM
60 lines
2.3 KiB
LLVM
; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
|
|
; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi < %s
|
|
|
|
define i32 @main() nounwind ssp {
|
|
entry:
|
|
%retval = alloca i32, align 4
|
|
%X = alloca <4 x i32>, align 16
|
|
%Y = alloca <4 x float>, align 16
|
|
store i32 0, i32* %retval
|
|
%tmp = load <4 x i32>* %X, align 16
|
|
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
|
|
%0 = load i32* %retval
|
|
ret i32 %0
|
|
}
|
|
|
|
define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
|
entry:
|
|
%__a.addr.i = alloca <4 x i32>, align 16
|
|
%v.addr = alloca <4 x i32>, align 16
|
|
%p.addr = alloca i8*, align 4
|
|
%offset.addr = alloca i32, align 4
|
|
%constants.addr = alloca <4 x float>*, align 4
|
|
store <4 x i32> %v, <4 x i32>* %v.addr, align 16
|
|
store i8* %p, i8** %p.addr, align 4
|
|
store i32 %offset, i32* %offset.addr, align 4
|
|
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
|
%tmp = load <4 x i32>* %v.addr, align 16
|
|
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
|
|
%tmp.i = load <4 x i32>* %__a.addr.i, align 16
|
|
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
|
|
%1 = bitcast <16 x i8> %0 to <4 x i32>
|
|
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
|
|
%tmp1 = load i8** %p.addr, align 4
|
|
%tmp2 = load i32* %offset.addr, align 4
|
|
%tmp3 = load <4 x float>** %constants.addr, align 4
|
|
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
|
|
ret void
|
|
}
|
|
|
|
define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp {
|
|
entry:
|
|
%v.addr = alloca <4 x float>, align 16
|
|
%p.addr = alloca i8*, align 4
|
|
%offset.addr = alloca i32, align 4
|
|
%constants.addr = alloca <4 x float>*, align 4
|
|
%data = alloca i64, align 4
|
|
store <4 x float> %v, <4 x float>* %v.addr, align 16
|
|
store i8* %p, i8** %p.addr, align 4
|
|
store i32 %offset, i32* %offset.addr, align 4
|
|
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
|
|
%tmp = load i64* %data, align 4
|
|
%tmp1 = load i8** %p.addr, align 4
|
|
%tmp2 = load i32* %offset.addr, align 4
|
|
%add.ptr = getelementptr i8* %tmp1, i32 %tmp2
|
|
%0 = bitcast i8* %add.ptr to i64*
|
|
%arrayidx = getelementptr inbounds i64* %0, i32 0
|
|
store i64 %tmp, i64* %arrayidx
|
|
ret void
|
|
}
|