2012-03-13 23:00:52 +01:00
|
|
|
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
|
|
|
|
; rdar://11035895
|
|
|
|
|
|
|
|
; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
|
|
|
|
; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
|
|
|
|
; (i32 extload $addr+c*sizeof(i16)
|
|
|
|
define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
|
|
|
|
entry:
|
When performing a truncating store, it's possible to rearrange the data
in-register, such that we can use a single vector store rather then a
series of scalar stores.
For func_4_8 the generated code
vldr d16, LCPI0_0
vmov d17, r0, r1
vadd.i16 d16, d17, d16
vmov.u16 r0, d16[3]
strb r0, [r2, #3]
vmov.u16 r0, d16[2]
strb r0, [r2, #2]
vmov.u16 r0, d16[1]
strb r0, [r2, #1]
vmov.u16 r0, d16[0]
strb r0, [r2]
bx lr
becomes
vldr d16, LCPI0_0
vmov d17, r0, r1
vadd.i16 d16, d17, d16
vuzp.8 d16, d17
vst1.32 {d16[0]}, [r2, :32]
bx lr
I'm not fond of how this combine pessimizes 2012-03-13-DAGCombineBug.ll,
but I couldn't think of a way to judiciously apply this combine.
This
ldrh r0, [r0, #4]
strh r0, [r1]
becomes
vldr d16, [r0]
vmov.u16 r0, d16[2]
vmov.32 d16[0], r0
vuzp.16 d16, d17
vst1.32 {d16[0]}, [r1, :32]
PR11158
rdar://10703339
llvm-svn: 154340
2012-04-09 22:32:02 +02:00
|
|
|
; CHECK: vst1.32
|
2012-03-13 23:00:52 +01:00
|
|
|
%0 = load <3 x i16> * %srcA, align 8
|
|
|
|
%1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
|
|
|
|
store <2 x i16> %1, <2 x i16> * %dst, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|