mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-02 00:42:52 +01:00
235913be77
be able to handle *ANY* alloca that is poked by loads and stores of bitcasts and GEPs with constant offsets. Before the code had a number of annoying limitations and caused it to miss cases such as storing into holes in structs and complex casts (as in bitfield-sroa) where we had unions of bitfields etc. This also handles a number of important cases that are exposed due to the ABI lowering stuff we do to pass stuff by value. One case that is pretty great is that we compile 2006-11-07-InvalidArrayPromote.ll into: define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind { %tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %v1) %tmp105 = bitcast <4 x i32> %tmp10 to i128 %tmp1056 = zext i128 %tmp105 to i256 %tmp.upgrd.43 = lshr i256 %tmp1056, 96 %tmp.upgrd.44 = trunc i256 %tmp.upgrd.43 to i32 ret i32 %tmp.upgrd.44 } which turns into: _func: subl $28, %esp cvttps2dq %xmm1, %xmm0 movaps %xmm0, (%esp) movl 12(%esp), %eax addl $28, %esp ret Which is pretty good code all things considering :). One effect of this is that SROA will start generating arbitrary bitwidth integers that are a multiple of 8 bits. In the case above, we got a 256 bit integer, but the codegen guys assure me that it can handle the simple and/or/shift/zext stuff that we're doing on these operations. This addresses rdar://6532315 llvm-svn: 63469
20 lines
1.0 KiB
LLVM
20 lines
1.0 KiB
LLVM
; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | not grep alloca
|
|
|
|
define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
|
|
%vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
|
|
%tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2]
|
|
%tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0]
|
|
%tmp.upgrd.2 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
|
|
store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
|
|
%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2]
|
|
%tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0]
|
|
%tmp14 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1]
|
|
store <4 x i32> %tmp10, <4 x i32>* %tmp14
|
|
%tmp15 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; <i32*> [#uses=1]
|
|
%tmp.upgrd.4 = load i32* %tmp15 ; <i32> [#uses=1]
|
|
ret i32 %tmp.upgrd.4
|
|
}
|
|
|
|
declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
|
|
|