mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
eeeb060307
Given the following C code llvm currently generates suboptimal code for x86-64: __m128 bss4( const __m128 *ptr, size_t i, size_t j ) { float f = ptr[i][j]; return (__m128) { f, f, f, f }; } ================================================= define <4 x float> @_Z4bss4PKDv4_fmm(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) #0 { %a1 = getelementptr inbounds <4 x float>* %ptr, i64 %i %a2 = load <4 x float>* %a1, align 16, !tbaa !1 %a3 = trunc i64 %j to i32 %a4 = extractelement <4 x float> %a2, i32 %a3 %a5 = insertelement <4 x float> undef, float %a4, i32 0 %a6 = insertelement <4 x float> %a5, float %a4, i32 1 %a7 = insertelement <4 x float> %a6, float %a4, i32 2 %a8 = insertelement <4 x float> %a7, float %a4, i32 3 ret <4 x float> %a8 } ================================================= shlq $4, %rsi addq %rdi, %rsi movslq %edx, %rax vbroadcastss (%rsi,%rax,4), %xmm0 retq ================================================= The movslq is uneeded, but is present because of the trunc to i32 and then sext back to i64 that the backend adds for vbroadcastss. We can't remove it because it changes the meaning. The IR that clang generates is already suboptimal. What clang really should emit is: %a4 = extractelement <4 x float> %a2, i64 %j This patch makes that legal. A separate patch will teach clang to do it. Differential Revision: http://reviews.llvm.org/D3519 llvm-svn: 207801
27 lines
1023 B
LLVM
27 lines
1023 B
LLVM
; RUN: llvm-as < %s | llvm-dis > %t1.ll
|
|
; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
|
|
; RUN: diff %t1.ll %t2.ll
|
|
|
|
define i32 @test_extractelement(<4 x i32> %V) {
|
|
%R = extractelement <4 x i32> %V, i32 1 ; <i32> [#uses=1]
|
|
%S = extractelement <4 x i32> %V, i64 1 ; <i32> [#uses=0]
|
|
ret i32 %R
|
|
}
|
|
|
|
define <4 x i32> @test_insertelement(<4 x i32> %V) {
|
|
%R = insertelement <4 x i32> %V, i32 0, i32 0 ; <<4 x i32>> [#uses=1]
|
|
%S = insertelement <4 x i32> %V, i32 0, i64 0 ; <<4 x i32>> [#uses=0]
|
|
ret <4 x i32> %R
|
|
}
|
|
|
|
define <4 x i32> @test_shufflevector_u(<4 x i32> %V) {
|
|
%R = shufflevector <4 x i32> %V, <4 x i32> %V, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 > ; <<4 x i32>> [#uses=1]
|
|
ret <4 x i32> %R
|
|
}
|
|
|
|
define <4 x float> @test_shufflevector_f(<4 x float> %V) {
|
|
%R = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 > ; <<4 x float>> [#uses=1]
|
|
ret <4 x float> %R
|
|
}
|
|
|