1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[ARM] Make -mcpu=generic schedule for an in-order core (Cortex-A8).

The benchmarking summarized in
http://lists.llvm.org/pipermail/llvm-dev/2017-May/113525.html showed
this is beneficial for a wide range of cores.

As is to be expected, quite a few small adaptations are needed to the
regressions tests, as the difference in scheduling results in:
- Quite a few small instruction schedule differences.
- A few changes in register allocation decisions caused by different
 instruction schedules.
- A few changes in IfConversion decisions, due to a difference in
 instruction schedule and/or the estimated cost of a branch mispredict.

llvm-svn: 306514
This commit is contained in:
Kristof Beyls 2017-06-28 07:07:03 +00:00
parent 9e52cb840d
commit 7d64810efd
48 changed files with 406 additions and 414 deletions

View File

@ -540,7 +540,7 @@ def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
//
// Dummy CPU, used to target architectures
def : ProcNoItin<"generic", []>;
def : ProcessorModel<"generic", CortexA8Model, []>;
def : ProcNoItin<"arm8", [ARMv4]>;
def : ProcNoItin<"arm810", [ARMv4]>;

View File

@ -24,7 +24,7 @@ entry:
; CHECK-LABEL: caller:
define void @caller() {
; CHECK: ldm r0, {r1, r2, r3}
; CHECK: ldm r{{[0-9]+}}, {r1, r2, r3}
call void @t(i32 0, %struct.s* @v);
ret void
}

View File

@ -19,9 +19,9 @@ entry:
; CHECK-LABEL: isel
; CHECK: push {r4, r5}
; CHECK: movw r4, #{{\d*}}
; CHECK: movw r12, #0
; CHECK: movt r12, #0
; CHECK: movw r4, #{{\d*}}
; CHECK: blx r12
; CHECK: sub.w sp, sp, r4

View File

@ -15,11 +15,11 @@ define i32 @f() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -36,11 +36,11 @@ define i32 @e() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -57,11 +57,11 @@ define i32 @d() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -78,11 +78,11 @@ define i32 @c() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -99,11 +99,11 @@ define i32 @b() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -120,11 +120,11 @@ define i16 @a() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
@ -141,11 +141,11 @@ define i8 @Z() {
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]

View File

@ -31,8 +31,8 @@ entry:
; CHECK-LABEL: use_arg:
; CHECK: push {[[csr:[^ ]*]], lr}
; CHECK: ldr [[csr]], [sp, #8]
; CHECK: add r0, sp, #8
; CHECK: ldr [[csr]], [sp, #8]
; CHECK: bl addrof_i32
; CHECK: mov r0, [[csr]]
; CHECK: pop {[[csr]], pc}
@ -50,8 +50,8 @@ entry:
; CHECK: push {r4, r5, r11, lr}
; CHECK: sub sp, sp, #8
; CHECK: ldr r4, [sp, #28]
; CHECK: ldr r5, [sp, #24]
; CHECK: mov r0, sp
; CHECK: ldr r5, [sp, #24]
; CHECK: str r4, [sp, #4]
; CHECK: str r5, [sp]
; CHECK: bl addrof_i64

View File

@ -171,8 +171,8 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {
;
; V8-LABEL: test_tst_assessment:
; V8: @ BB#0:
; V8-NEXT: lsls r1, r1, #31
; V8-NEXT: and r0, r0, #1
; V8-NEXT: lsls r1, r1, #31
; V8-NEXT: it ne
; V8-NEXT: subne r0, #1
; V8-NEXT: bx lr

View File

@ -47,8 +47,8 @@ lab4:
; CHECK-LABEL: jump_table:
; ARM: lsl r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2
; ARM: adr r[[R_TAB_BASE:[0-9]+]], [[LJTI:\.LJTI[0-9]+_[0-9]+]]
; ARM: lsl r[[R_TAB_IDX:[0-9]+]], r{{[0-9]+}}, #2
; ARM_ABS: ldr pc, [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC: ldr r[[R_OFFSET:[0-9]+]], [r[[R_TAB_IDX]], r[[R_TAB_BASE]]]
; ARM_PC: add pc, r[[R_OFFSET]], r[[R_TAB_BASE]]

View File

@ -45,15 +45,19 @@ target triple = "armv7--linux-gnueabi"
; CHECK: @ %while.cond2
; CHECK: add
; CHECK-NEXT: cmp r{{[0-1]+}}, #1
; Set the return value.
; CHECK-NEXT: moveq r0,
; CHECK-NEXT: popeq
; Jump to the return block
; CHECK-NEXT: beq [[RETURN_BLOCK:[.a-zA-Z0-9_]+]]
;
; Use the back edge to check we get the label of the loop right.
; This is to make sure we check the right loop pattern.
; CHECK: @ %while.body24.land.rhs14_crit_edge
; CHECK: cmp r{{[0-9]+}}, #192
; CHECK-NEXT bhs [[LOOP_HEADER]]
;
; CHECK: [[RETURN_BLOCK]]:
; Set the return value.
; CHECK-NEXT: mov r0,
; CHECK-NEXT: pop
define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {
entry:
%cmp = icmp sgt i32 %off, -1

View File

@ -70,8 +70,8 @@ entry:
; CHECK-ARMV7-NEXT: ldrexb [[SUCCESS]], [r0]
; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], r1
; CHECK-ARMV7-NEXT: beq [[HEAD]]
; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: mov r0, #0
; CHECK-ARMV7-NEXT: clrex
; CHECK-ARMV7-NEXT: bx lr
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
@ -88,6 +88,6 @@ entry:
; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
; CHECK-THUMBV7-NEXT: clrex
; CHECK-THUMBV7-NEXT: movs r0, #0
; CHECK-THUMBV7-NEXT: clrex
; CHECK-THUMBV7-NEXT: bx lr

View File

@ -16,8 +16,8 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
; CHECK: @ BB#0:
; CHECK-NEXT: vmov.i16 d16, #0x1
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: veor d16, d17, d16
; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: vmovl.u16 q8, d16
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
@ -31,13 +31,13 @@ define <4 x i32> @sext_inc_vec(<4 x i1> %x) {
define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmpgt_sext_inc_vec:
; CHECK: @ BB#0:
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vcge.s32 q8, q8, q9
; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-NEXT: vcge.s32 q8, q9, q8
; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
@ -50,13 +50,13 @@ define <4 x i32> @cmpgt_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @cmpne_sext_inc_vec(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: cmpne_sext_inc_vec:
; CHECK: @ BB#0:
; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vmov.i32 q10, #0x1
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vceq.i32 q8, q9, q8
; CHECK-NEXT: vand q8, q8, q10
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vmov d16, r0, r1
; CHECK-NEXT: vceq.i32 q8, q8, q9
; CHECK-NEXT: vmov.i32 q9, #0x1
; CHECK-NEXT: vand q8, q8, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr

View File

@ -47,12 +47,12 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
; CHECK-NEXT: cmp [[SUCCESS]], #0
; CHECK-NEXT: bxne lr
; CHECK-NEXT: dmb ish
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: dmb ish
; CHECK-NEXT: bx lr
; CHECK-NEXT: [[LDFAILBB]]:
; CHECK-NEXT: clrex
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: clrex
; CHECK-NEXT: bx lr
ret i1 %success

View File

@ -38,9 +38,8 @@ entry:
br i1 %0, label %bb5, label %bb.nph15
bb1: ; preds = %bb2.preheader, %bb1
; CHECK: LBB1_[[BB3:.]]: @ %bb3
; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
; CHECK: blt LBB1_[[BB3]]
; CHECK: blt LBB1_[[BB3:.]]
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
@ -54,7 +53,7 @@ bb1: ; preds = %bb2.preheader, %bb1
bb3: ; preds = %bb1, %bb2.preheader
; CHECK: LBB1_[[BB1:.]]: @ %bb1
; CHECK: bne LBB1_[[BB1]]
; CHECK: b LBB1_[[BB3]]
; CHECK: LBB1_[[BB3]]: @ %bb3
%sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
%3 = add i32 %pass.011, 1 ; <i32> [#uses=2]
%exitcond18 = icmp eq i32 %3, %passes ; <i1> [#uses=1]

View File

@ -8,14 +8,14 @@
; CHECK: ********** MI Scheduling **********
; CHECK: foo:BB#0 entry
; GENERIC: SDIV
; GENERIC: LDRi12
; GENERIC: Latency : 1
; GENERIC: EORrr
; GENERIC: Latency : 1
; GENERIC: LDRi12
; GENERIC: Latency : 4
; GENERIC: ADDrr
; GENERIC: Latency : 1
; GENERIC: SDIV
; GENERIC: Latency : 0
; GENERIC: SUBrr
; GENERIC: Latency : 1

View File

@ -12,10 +12,10 @@
; GENERIC: Latency : 1
; R52_SCHED: Latency : 3
; CHECK: MLA
; GENERIC: Latency : 1
; GENERIC: Latency : 2
; R52_SCHED: Latency : 4
; CHECK: SDIV
; GENERIC: Latency : 1
; GENERIC: Latency : 0
; R52_SCHED: Latency : 8
; CHECK: ** Final schedule for BB#0 ***
; GENERIC: EORrr

View File

@ -40,8 +40,8 @@ define i64 @test_i64(i64 %a) {
; CHECK-LABEL: test_i64:
; CHECK: rbit
; CHECK: rbit
; CHECK: cmp
; CHECK: clz
; CHECK: cmp
; CHECK: add
; CHECK: clzne
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
@ -81,8 +81,8 @@ define i64 @test_i64_zero_undef(i64 %a) {
; CHECK-LABEL: test_i64_zero_undef:
; CHECK: rbit
; CHECK: rbit
; CHECK: cmp
; CHECK: clz
; CHECK: cmp
; CHECK: add
; CHECK: clzne
%tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true)

View File

@ -168,17 +168,17 @@ define void @test_v4i32(<4 x i32>* %p) {
define void @test_v1i64(<1 x i64>* %p) {
; CHECK-LABEL: test_v1i64:
; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
; CHECK: vand [[D1]], [[D1]], [[D2]]
; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
; CHECK: vcnt.8 [[D1]], [[D1]]
; CHECK: vpaddl.u8 [[D1]], [[D1]]
; CHECK: vpaddl.u16 [[D1]], [[D1]]
; CHECK: vpaddl.u32 [[D1]], [[D1]]
; CHECK: vstr [[D1]], [r0]
; CHECK: vand [[D2]], [[D1]], [[D2]]
; CHECK: vadd.i64 [[D2]], [[D2]], [[D3]]
; CHECK: vcnt.8 [[D2]], [[D2]]
; CHECK: vpaddl.u8 [[D2]], [[D2]]
; CHECK: vpaddl.u16 [[D2]], [[D2]]
; CHECK: vpaddl.u32 [[D2]], [[D2]]
; CHECK: vstr [[D2]], [r0]
%a = load <1 x i64>, <1 x i64>* %p
%tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)
store <1 x i64> %tmp, <1 x i64>* %p
@ -187,17 +187,17 @@ define void @test_v1i64(<1 x i64>* %p) {
define void @test_v2i64(<2 x i64>* %p) {
; CHECK-LABEL: test_v2i64:
; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
; CHECK: vcnt.8 [[Q1]], [[Q1]]
; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
; CHECK: vand [[Q2]], [[Q1]], [[Q2]]
; CHECK: vadd.i64 [[Q2]], [[Q2]], [[Q3]]
; CHECK: vcnt.8 [[Q2]], [[Q2]]
; CHECK: vpaddl.u8 [[Q2]], [[Q2]]
; CHECK: vpaddl.u16 [[Q2]], [[Q2]]
; CHECK: vpaddl.u32 [[Q2]], [[Q2]]
; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
%a = load <2 x i64>, <2 x i64>* %p
%tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
store <2 x i64> %tmp, <2 x i64>* %p
@ -346,17 +346,17 @@ define void @test_v4i32_zero_undef(<4 x i32>* %p) {
define void @test_v1i64_zero_undef(<1 x i64>* %p) {
; CHECK-LABEL: test_v1i64_zero_undef:
; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i32 [[D2:d[0-9]+]], #0x0
; CHECK: vldr [[D1:d[0-9]+]], [r0]
; CHECK: vmov.i64 [[D3:d[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[D2]], [[D2]], [[D1]]
; CHECK: vand [[D1]], [[D1]], [[D2]]
; CHECK: vadd.i64 [[D1]], [[D1]], [[D3]]
; CHECK: vcnt.8 [[D1]], [[D1]]
; CHECK: vpaddl.u8 [[D1]], [[D1]]
; CHECK: vpaddl.u16 [[D1]], [[D1]]
; CHECK: vpaddl.u32 [[D1]], [[D1]]
; CHECK: vstr [[D1]], [r0]
; CHECK: vand [[D2]], [[D1]], [[D2]]
; CHECK: vadd.i64 [[D2]], [[D2]], [[D3]]
; CHECK: vcnt.8 [[D2]], [[D2]]
; CHECK: vpaddl.u8 [[D2]], [[D2]]
; CHECK: vpaddl.u16 [[D2]], [[D2]]
; CHECK: vpaddl.u32 [[D2]], [[D2]]
; CHECK: vstr [[D2]], [r0]
%a = load <1 x i64>, <1 x i64>* %p
%tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
store <1 x i64> %tmp, <1 x i64>* %p
@ -365,17 +365,17 @@ define void @test_v1i64_zero_undef(<1 x i64>* %p) {
define void @test_v2i64_zero_undef(<2 x i64>* %p) {
; CHECK-LABEL: test_v2i64_zero_undef:
; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i32 [[Q2:q[0-9]+]], #0x0
; CHECK: vld1.64 {[[D1:d[0-9]+]], [[D2:d[0-9]+]]}, [r0]
; CHECK: vmov.i64 [[Q3:q[0-9]+]], #0xffffffffffffffff
; CHECK: vsub.i64 [[Q2]], [[Q2]], [[Q1:q[0-9]+]]
; CHECK: vand [[Q1]], [[Q1]], [[Q2]]
; CHECK: vadd.i64 [[Q1]], [[Q1]], [[Q3]]
; CHECK: vcnt.8 [[Q1]], [[Q1]]
; CHECK: vpaddl.u8 [[Q1]], [[Q1]]
; CHECK: vpaddl.u16 [[Q1]], [[Q1]]
; CHECK: vpaddl.u32 [[Q1]], [[Q1]]
; CHECK: vst1.64 {[[D1]], [[D2]]}, [r0]
; CHECK: vand [[Q2]], [[Q1]], [[Q2]]
; CHECK: vadd.i64 [[Q2]], [[Q2]], [[Q3]]
; CHECK: vcnt.8 [[Q2]], [[Q2]]
; CHECK: vpaddl.u8 [[Q2]], [[Q2]]
; CHECK: vpaddl.u16 [[Q2]], [[Q2]]
; CHECK: vpaddl.u32 [[Q2]], [[Q2]]
; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
%a = load <2 x i64>, <2 x i64>* %p
%tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
store <2 x i64> %tmp, <2 x i64>* %p

View File

@ -26,7 +26,7 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
; THUMB-LABEL: _ZTW2sg
; THUMB: push {{.*}}lr
; THUMB: blx
; THUMB: bne [[TH_end:.?LBB0_[0-9]+]]
; THUMB: bne{{(.w)?}} [[TH_end:.?LBB0_[0-9]+]]
; THUMB: blx
; THUMB: tlv_atexit
; THUMB: [[TH_end]]:

View File

@ -687,8 +687,8 @@ define void @test_maxnan(half* %p) #0 {
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-LIBCALL: bl __aeabi_h2f
; CHECK-VFP-LIBCALL: vbsl
; CHECK-NOVFP: bic
; CHECK-NOVFP: and
; CHECK-NOVFP: bic
; CHECK-NOVFP: orr
; CHECK-LIBCALL: bl __aeabi_f2h
define void @test_copysign(half* %p, half* %q) #0 {
@ -818,25 +818,24 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
; CHECK-ALL-LABEL: test_insertelement:
; CHECK-ALL: sub sp, sp, #8
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: mov
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: mov
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: orr
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL: ldrh
; CHECK-ALL: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: ldrh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL-DAG: strh
; CHECK-ALL: add sp, sp, #8
define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
%a = load half, half* %p, align 2

View File

@ -11,8 +11,8 @@ target triple = "armv7a--none-eabi"
; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]]
; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]]
; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]]
; CHECK-NEXT: uxth [[RREG2:r[0-9]+]], [[RREG1]]
; CHECK-NEXT: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]]
; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
; CHECK-DAG: strh [[RREG1]], [r0, #4]
; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]
; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32]

View File

@ -5,8 +5,6 @@
define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
; CHECK: cmpeq
; CHECK: moveq
; CHECK: popeq
entry:
br label %tailrecurse

View File

@ -55,8 +55,8 @@ define void @i24_and_or(i24* %a) {
define void @i24_insert_bit(i24* %a, i1 zeroext %bit) {
; LE-LABEL: i24_insert_bit:
; LE: @ BB#0:
; LE-NEXT: ldrh r2, [r0]
; LE-NEXT: mov r3, #255
; LE-NEXT: ldrh r2, [r0]
; LE-NEXT: orr r3, r3, #57088
; LE-NEXT: and r2, r2, r3
; LE-NEXT: orr r1, r2, r1, lsl #13
@ -99,8 +99,8 @@ define void @i56_or(i56* %a) {
; BE-NEXT: orr r2, r3, r2, lsl #8
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strb r2, [r1, #2]
; BE-NEXT: lsr r3, r2, #8
; BE-NEXT: strh r3, [r1]
; BE-NEXT: bic r1, r12, #255
; BE-NEXT: orr r1, r1, r2, lsr #24
@ -127,8 +127,8 @@ define void @i56_and_or(i56* %a) {
; BE-NEXT: mov r3, #128
; BE-NEXT: ldrh r2, [r1, #4]!
; BE-NEXT: strb r3, [r1, #2]
; BE-NEXT: lsl r2, r2, #8
; BE-NEXT: ldr r12, [r0]
; BE-NEXT: lsl r2, r2, #8
; BE-NEXT: orr r2, r2, r12, lsl #24
; BE-NEXT: orr r2, r2, #384
; BE-NEXT: lsr r3, r2, #8

View File

@ -56,9 +56,11 @@ L2: ; preds = %L3, %bb2
L1: ; preds = %L2, %bb2
%res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; <i32> [#uses=1]
; ARM-LABEL: %L1
; ARM: ldr [[R_NEXTADDR:r[0-9]+]], LCPI
; ARM: ldr [[R1:r[0-9]+]], LCPI
; ARM: add [[R_NEXTADDR_b:r[0-9]+]], pc, [[R_NEXTADDR]]
; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
; ARM: str [[R1b]]
; ARM: str [[R1b]], {{\[}}[[R_NEXTADDR_b]]]
; THUMB-LABEL: %L1
; THUMB: ldr [[R2:r[0-9]+]], LCPI

View File

@ -13,7 +13,7 @@ define %BigInt @test_moved_jumptable(i1 %tst, i32 %sw, %BigInt %l) {
; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]]
; CHECK: [[SKIP_TABLE]]:
; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}}
; CHECK: add pc, {{r[0-9]+|lr}}, {{r[0-9]+|lr}}
br i1 %tst, label %simple, label %complex
simple:

View File

@ -10,7 +10,7 @@ define i32 @test_tbh(i1 %tst, i32 %sw, i32 %l) {
; T2-LABEL: test_tbh:
; T2: [[ANCHOR:.LCPI[0-9_]+]]:
; T2: tbh [pc, r{{[0-9]+}}, lsl #1]
; T2-NEXT: @ BB#1
; T2-NEXT: @ BB#{{[0-9]+}}
; T2-NEXT: LJTI
; T2-NEXT: .short (.LBB0_[[x:[0-9]+]]-([[ANCHOR]]+4))/2
; T2-NEXT: .short (.LBB0_{{[0-9]+}}-([[ANCHOR]]+4))/2

View File

@ -17,22 +17,24 @@ entry:
%add6 = add nsw i256 %or, %d
store i256 %add6, i256* %b, align 8
ret void
; CHECK-DAG: ldm r3
; CHECK-DAG: ldm r2
; CHECK-DAG: ldr {{.*}}, [r3, #20]
; CHECK-DAG: ldr {{.*}}, [r3]
; CHECK-DAG: ldr {{.*}}, [r3, #4]
; CHECK-DAG: ldr {{.*}}, [r3, #8]
; CHECK-DAG: ldr {{.*}}, [r3, #12]
; CHECK-DAG: ldr {{.*}}, [r3, #16]
; CHECK-DAG: ldr {{.*}}, [r3, #28]
; CHECK-DAG: ldr {{.*}}, [r3, #20]
; CHECK-DAG: ldr {{.*}}, [r3, #24]
; CHECK-DAG: ldr {{.*}}, [r3, #28]
; CHECK-DAG: ldr {{.*}}, [r2, #20]
; CHECK-DAG: ldr {{.*}}, [r2, #16]
; CHECK-DAG: ldr {{.*}}, [r2, #28]
; CHECK-DAG: ldr {{.*}}, [r2, #24]
; CHECK-DAG: stmib r0
; CHECK-DAG: str {{.*}}, [r0]
; CHECK-DAG: ldr {{.*}}, [r2, #28]
; CHECK-DAG: stm r0
; CHECK-DAG: str {{.*}}, [r0, #20]
; CHECK-DAG: str {{.*}}, [r0, #24]
; CHECK-DAG: str {{.*}}, [r0, #28]
; CHECK-DAG: str {{.*}}, [r1]
; CHECK-DAG: stmib r1
; CHECK-DAG: stm r1
; CHECK-DAG: str {{.*}}, [r1, #20]
; CHECK-DAG: str {{.*}}, [r1, #24]
; CHECK-DAG: str {{.*}}, [r1, #28]
}

View File

@ -10,7 +10,7 @@
; CHECK-NOT: str
; CHECK: ldr
; CHECK: str
; CHECK: bx
; CHECK: {{bx|pop.*pc}}
define i32 @get_set_complex({ float, float }* noalias nocapture %retptr,
{ i8*, i32 }** noalias nocapture readnone %excinfo,
i8* noalias nocapture readnone %env,

View File

@ -28,15 +28,15 @@ define i32 @f1(i64 %x, i64 %y) {
define i32 @f2(i64 %x, i64 %y) {
; CHECK-LABEL: f2:
; CHECK-LE: lsr{{.*}}r2
; CHECK-LE-NEXT: rsb r3, r2, #32
; CHECK-LE: rsb r3, r2, #32
; CHECK-LE-NEXT: lsr{{.*}}r2
; CHECK-LE-NEXT: sub r2, r2, #32
; CHECK-LE-NEXT: orr r0, r0, r1, lsl r3
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: asrge r0, r1, r2
; CHECK-BE: lsr{{.*}}r3
; CHECK-BE-NEXT: rsb r2, r3, #32
; CHECK-BE: rsb r2, r3, #32
; CHECK-BE-NEXT: lsr{{.*}}r3
; CHECK-BE-NEXT: orr r1, r1, r0, lsl r2
; CHECK-BE-NEXT: sub r2, r3, #32
; CHECK-BE-NEXT: cmp r2, #0
@ -49,15 +49,15 @@ define i32 @f2(i64 %x, i64 %y) {
define i32 @f3(i64 %x, i64 %y) {
; CHECK-LABEL: f3:
; CHECK-LE: lsr{{.*}}r2
; CHECK-LE-NEXT: rsb r3, r2, #32
; CHECK-LE: rsb r3, r2, #32
; CHECK-LE-NEXT: lsr{{.*}}r2
; CHECK-LE-NEXT: sub r2, r2, #32
; CHECK-LE-NEXT: orr r0, r0, r1, lsl r3
; CHECK-LE-NEXT: cmp r2, #0
; CHECK-LE-NEXT: lsrge r0, r1, r2
; CHECK-BE: lsr{{.*}}r3
; CHECK-BE-NEXT: rsb r2, r3, #32
; CHECK-BE: rsb r2, r3, #32
; CHECK-BE-NEXT: lsr{{.*}}r3
; CHECK-BE-NEXT: orr r1, r1, r0, lsl r2
; CHECK-BE-NEXT: sub r2, r3, #32
; CHECK-BE-NEXT: cmp r2, #0

View File

@ -74,15 +74,16 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aese.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QB]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aese.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aese.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QE]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QF]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aese.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesmc.8 {{q[0-9][0-9]?}}, [[QG]]
; CHECK: aese.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
@ -159,15 +160,16 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QA]]
; CHECK: aesd.8 [[QB:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QB]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QC:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QC]]
; CHECK: aesd.8 [[QD:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QD]]
; CHECK: aesd.8 [[QE:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QE]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QF:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QF]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}
; CHECK: aesd.8 [[QG:q[0-9][0-9]?]], {{q[0-9][0-9]?}}
; CHECK-NEXT: aesimc.8 {{q[0-9][0-9]?}}, [[QG]]
; CHECK: aesd.8 {{q[0-9][0-9]?}}, {{q[0-9][0-9]?}}

View File

@ -281,16 +281,16 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) {
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ands r12, r0, #1
; CHECK-NEXT: mov lr, #1
; CHECK-NEXT: ands r12, r0, #1
; CHECK-NEXT: mov r0, #23
; CHECK-NEXT: eor r3, r3, #1
; CHECK-NEXT: orr lr, lr, #65536
; CHECK-NEXT: mvnne r0, #3
; CHECK-NEXT: movne r12, #1
; CHECK-NEXT: and r4, r0, lr
; CHECK-NEXT: eor r2, r2, lr
; CHECK-NEXT: movne r12, #1
; CHECK-NEXT: subs r0, r4, #1
; CHECK-NEXT: eor r2, r2, lr
; CHECK-NEXT: eor r3, r3, #1
; CHECK-NEXT: sbc r1, r12, #0
; CHECK-NEXT: orrs r2, r2, r3
; CHECK-NEXT: movne r0, r4

View File

@ -29,8 +29,8 @@ define i64 @test_shl(i64 %val, i64 %amt) {
; Explanation for lshr is pretty much the reverse of shl.
define i64 @test_lshr(i64 %val, i64 %amt) {
; CHECK-LABEL: test_lshr:
; CHECK: lsr r0, r0, r2
; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32
; CHECK: lsr r0, r0, r2
; CHECK: orr r0, r0, r1, lsl [[REVERSE_SHIFT]]
; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32
; CHECK: cmp [[EXTRA_SHIFT]], #0

View File

@ -420,10 +420,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; CHECK-ARMV7-DAG: str r8, [s[[STK1:.*]]]
; CHECK-ARMV7-DAG: str r10, [s[[STK2:.*]]]
; Store arguments.
; CHECK-ARMV7: mov r6, r3
; CHECK-ARMV7: mov r4, r2
; CHECK-ARMV7: mov r11, r1
; CHECK-ARMV7: mov r5, r0
; CHECK-ARMV7-DAG: mov r6, r3
; CHECK-ARMV7-DAG: mov r4, r2
; CHECK-ARMV7-DAG: mov r11, r1
; CHECK-ARMV7-DAG: mov r5, r0
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
@ -435,10 +435,10 @@ define swiftcc void @swifterror_reg_clobber(%swift_error** nocapture %err) {
; Restore original arguments.
; CHECK-ARMV7-DAG: ldr r10, [s[[STK2]]]
; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]]
; CHECK-ARMV7: mov r0, r5
; CHECK-ARMV7: mov r1, r11
; CHECK-ARMV7: mov r2, r4
; CHECK-ARMV7: mov r3, r6
; CHECK-ARMV7-DAG: mov r0, r5
; CHECK-ARMV7-DAG: mov r1, r11
; CHECK-ARMV7-DAG: mov r2, r4
; CHECK-ARMV7-DAG: mov r3, r6
; CHECK-ARMV7: bl _params_in_reg2
; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc void @params_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
@ -469,25 +469,25 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e
; CHECK-ARMV7: mov r8, #0
; CHECK-ARMV7: bl _params_in_reg2
; Restore original arguments.
; CHECK-ARMV7: ldr r3, [s[[STK2]]]
; CHECK-ARMV7: ldr r10, [s[[STK1]]]
; CHECK-ARMV7-DAG: ldr r3, [s[[STK2]]]
; CHECK-ARMV7-DAG: ldr r10, [s[[STK1]]]
; Store %error_ptr_ref;
; CHECK-ARMV7: str r8, [s[[STK3:.*]]]
; CHECK-ARMV7-DAG: str r8, [s[[STK3:.*]]]
; Restore original arguments.
; CHECK-ARMV7: mov r0, r5
; CHECK-ARMV7: mov r1, r11
; CHECK-ARMV7: mov r2, r4
; CHECK-ARMV7: mov r8, r6
; CHECK-ARMV7-DAG: mov r0, r5
; CHECK-ARMV7-DAG: mov r1, r11
; CHECK-ARMV7-DAG: mov r2, r4
; CHECK-ARMV7-DAG: mov r8, r6
; CHECK-ARMV7: bl _params_and_return_in_reg2
; Store swifterror return %err;
; CHECK-ARMV7: str r8, [s[[STK1]]]
; CHECK-ARMV7-DAG: str r8, [s[[STK1]]]
; Load swifterror value %error_ptr_ref.
; CHECK-ARMV7: ldr r8, [s[[STK3]]]
; CHECK-ARMV7-DAG: ldr r8, [s[[STK3]]]
; Save return values.
; CHECK-ARMV7: mov r4, r0
; CHECK-ARMV7: mov r5, r1
; CHECK-ARMV7: mov r6, r2
; CHECK-ARMV7: mov r11, r3
; CHECK-ARMV7-DAG: mov r4, r0
; CHECK-ARMV7-DAG: mov r5, r1
; CHECK-ARMV7-DAG: mov r6, r2
; CHECK-ARMV7-DAG: mov r11, r3
; Setup call.
; CHECK-ARMV7: mov r0, #1
; CHECK-ARMV7: mov r1, #2
@ -496,12 +496,12 @@ declare swiftcc void @params_in_reg2(i32, i32, i32, i32, i8* swiftself, %swift_e
; CHECK-ARMV7: mov r10, #0
; CHECK-ARMV7: bl _params_in_reg2
; Load swifterror %err;
; CHECK-ARMV7: ldr r8, [s[[STK1]]]
; CHECK-ARMV7-DAG: ldr r8, [s[[STK1]]]
; Restore return values for returning.
; CHECK-ARMV7: mov r0, r4
; CHECK-ARMV7: mov r1, r5
; CHECK-ARMV7: mov r2, r6
; CHECK-ARMV7: mov r3, r11
; CHECK-ARMV7-DAG: mov r0, r4
; CHECK-ARMV7-DAG: mov r1, r5
; CHECK-ARMV7-DAG: mov r2, r6
; CHECK-ARMV7-DAG: mov r3, r11
; CHECK-ARMV7: pop {r4, r5, r6, r7, r10, r11, pc}
define swiftcc { i32, i32, i32, i32} @params_and_return_in_reg(i32, i32, i32, i32, i8* swiftself, %swift_error** nocapture swifterror %err) {
%error_ptr_ref = alloca swifterror %swift_error*, align 8

View File

@ -162,8 +162,8 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
; rdar://7923010
define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vcgt_zext:
;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
;CHECK-DAG: vmov.i32 [[Q0:q[0-9]+]], #0x1
;CHECK-DAG: vcgt.f32 [[Q1:q[0-9]+]]
;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
%tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = load <4 x float>, <4 x float>* %B

View File

@ -237,14 +237,14 @@ entry:
; illegal type to a legal type.
define <2 x i8> @test_truncate(<2 x i128> %in) {
; CHECK-LABEL: test_truncate:
; CHECK: mov [[BASE:r[0-9]+]], sp
; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
; REG2 Should map on the same Q register as REG1, i.e., REG2 = REG1 - 1, but we
; cannot express that.
; CHECK-NEXT: vmov.32 [[REG2:d[0-9]+]][0], r0
; CHECK: vmov.32 [[REG2:d[0-9]+]][0], r0
; CHECK-NEXT: mov [[BASE:r[0-9]+]], sp
; CHECK-NEXT: vld1.32 {[[REG1:d[0-9]+]][0]}, {{\[}}[[BASE]]:32]
; CHECK-NEXT: add [[BASE2:r[0-9]+]], [[BASE]], #4
; CHECK-NEXT: vmov.32 [[REG2]][1], r1
; CHECK-NEXT: vld1.32 {[[REG1]][1]}, {{\[}}[[BASE2]]:32]
; The Q register used here should match floor(REG1/2), but we cannot express that.
; CHECK-NEXT: vmovn.i64 [[RES:d[0-9]+]], q{{[0-9]+}}
; CHECK-NEXT: vmov r0, r1, [[RES]]

View File

@ -182,9 +182,9 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_interleaved:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vext.16 d16, d16, d17, #3
; CHECK-NEXT: vorr d17, d16, d16
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vuzp.16 d16, d17
; CHECK-NEXT: vzip.16 d16, d18
; CHECK-NEXT: vmov r0, r1, d16
@ -217,16 +217,16 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
; CHECK-LABEL: test_multisource:
; CHECK: @ BB#0:
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: add r2, r0, #32
; CHECK-NEXT: add r0, r0, #48
; CHECK-NEXT: add r2, r0, #48
; CHECK-NEXT: add r0, r0, #32
; CHECK-NEXT: vld1.16 {d16, d17}, [r1:128]!
; CHECK-NEXT: vld1.64 {d20, d21}, [r2:128]
; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128]
; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]
; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128]
; CHECK-NEXT: vorr d24, d20, d20
; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128]
; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]
; CHECK-NEXT: vzip.16 d24, d18
; CHECK-NEXT: vext.16 d18, d20, d24, #2
; CHECK-NEXT: vtrn.16 q8, q11
; CHECK-NEXT: vext.16 d18, d20, d24, #2
; CHECK-NEXT: vext.16 d16, d18, d16, #2
; CHECK-NEXT: vext.16 d16, d16, d16, #2
; CHECK-NEXT: vmov r0, r1, d16
@ -259,24 +259,24 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_illegal:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vmov.u16 r1, d16[0]
; CHECK-NEXT: vmov.u16 r0, d17[3]
; CHECK-NEXT: vorr d22, d16, d16
; CHECK-NEXT: vorr d23, d16, d16
; CHECK-NEXT: vmov.16 d20[0], r1
; CHECK-NEXT: vuzp.16 d22, d23
; CHECK-NEXT: vmov.u16 r1, d17[1]
; CHECK-NEXT: vmov.16 d20[1], r0
; CHECK-NEXT: vuzp.16 d22, d18
; CHECK-NEXT: vmov.16 d20[2], r1
; CHECK-NEXT: vmov.u16 r0, d19[1]
; CHECK-NEXT: vext.16 d21, d16, d18, #3
; CHECK-NEXT: vmov.16 d20[3], r0
; CHECK-NEXT: vmov r0, r1, d20
; CHECK-NEXT: vmov r2, r3, d21
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vorr d22, d16, d16
; CHECK-NEXT: vmov.u16 r0, d16[0]
; CHECK-NEXT: vorr d23, d16, d16
; CHECK-NEXT: vmov.u16 r2, d17[3]
; CHECK-NEXT: vmov.u16 r3, d17[1]
; CHECK-NEXT: vld1.64 {d18, d19}, [r1]
; CHECK-NEXT: vmov.u16 r1, d19[1]
; CHECK-NEXT: vuzp.16 d22, d23
; CHECK-NEXT: vuzp.16 d22, d18
; CHECK-NEXT: vmov.16 d20[0], r0
; CHECK-NEXT: vmov.16 d20[1], r2
; CHECK-NEXT: vmov.16 d20[2], r3
; CHECK-NEXT: vmov.16 d20[3], r1
; CHECK-NEXT: vext.16 d21, d16, d18, #3
; CHECK-NEXT: vmov r0, r1, d20
; CHECK-NEXT: vmov r2, r3, d21
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
@ -289,10 +289,10 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>
; CHECK-LABEL: test_elem_mismatch:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0:128]
; CHECK-NEXT: vmov.32 r2, d16[0]
; CHECK-NEXT: vmov.32 r0, d17[0]
; CHECK-NEXT: vmov.16 d16[0], r2
; CHECK-NEXT: vmov.16 d16[1], r0
; CHECK-NEXT: vmov.32 r0, d16[0]
; CHECK-NEXT: vmov.32 r2, d17[0]
; CHECK-NEXT: vmov.16 d16[0], r0
; CHECK-NEXT: vmov.16 d16[1], r2
; CHECK-NEXT: vstr d16, [r1]
; CHECK-NEXT: mov pc, lr
%tmp0 = load <2 x i64>, <2 x i64>* %src, align 16

View File

@ -40,8 +40,8 @@ define void @test_add(float* %P, double* %D) {
define void @test_ext_round(float* %P, double* %D) {
;CHECK-LABEL: test_ext_round:
%a = load float, float* %P ; <float> [#uses=1]
;CHECK: vcvt.f64.f32
;CHECK: vcvt.f32.f64
;CHECK-DAG: vcvt.f64.f32
;CHECK-DAG: vcvt.f32.f64
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double, double* %D ; <double> [#uses=1]
%B = fptrunc double %A to float ; <float> [#uses=1]

View File

@ -78,7 +78,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;Check for a post-increment updating load.
define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
;CHECK-LABEL: vld1Qi8_update:
;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+|lr}}:64]!
%A = load i8*, i8** %ptr
%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = getelementptr i8, i8* %A, i32 16

View File

@ -14,7 +14,7 @@
define <8 x i8> @vld2i8(i8* %A) nounwind {
;CHECK-LABEL: vld2i8:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld2.8 {d16, d17}, [r0:64]
;CHECK: vld2.8 {d16, d17}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
define <4 x i16> @vld2i16(i16* %A) nounwind {
;CHECK-LABEL: vld2i16:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld2.16 {d16, d17}, [r0:128]
;CHECK: vld2.16 {d16, d17}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
@ -59,7 +59,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
;Check for a post-increment updating load.
define <2 x float> @vld2f_update(float** %ptr) nounwind {
;CHECK-LABEL: vld2f_update:
;CHECK: vld2.32 {d16, d17}, [r1]!
;CHECK: vld2.32 {d16, d17}, [{{r[0-9]+|lr}}]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
@ -74,7 +74,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
define <1 x i64> @vld2i64(i64* %A) nounwind {
;CHECK-LABEL: vld2i64:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vld1.64 {d16, d17}, [r0:128]
;CHECK: vld1.64 {d16, d17}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
@ -86,7 +86,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld2Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
@ -97,7 +97,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld2Qi8_update:
;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
;CHECK: vld2.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1
%A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
@ -111,7 +111,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
define <8 x i16> @vld2Qi16(i16* %A) nounwind {
;CHECK-LABEL: vld2Qi16:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
;CHECK: vld2.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
@ -123,7 +123,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
define <4 x i32> @vld2Qi32(i32* %A) nounwind {
;CHECK-LABEL: vld2Qi32:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
;CHECK: vld2.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0

View File

@ -15,7 +15,7 @@
define <8 x i8> @vld3i8(i8* %A) nounwind {
;CHECK-LABEL: vld3i8:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
;CHECK: vld3.8 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
@ -37,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3i16_update:
;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+|lr}}], {{r[0-9]+|lr}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
@ -74,7 +74,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
define <1 x i64> @vld3i64(i64* %A) nounwind {
;CHECK-LABEL: vld3i64:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@ -85,7 +85,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld3i64_update:
;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
%tmp5 = getelementptr i64, i64* %A, i32 3
@ -99,8 +99,8 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
define <16 x i8> @vld3Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld3Qi8:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
;CHECK: vld3.8 {d16, d18, d20}, [{{r[0-9]+|lr}}:64]!
;CHECK: vld3.8 {d17, d19, d21}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32)
%tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
@ -135,8 +135,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK-LABEL: vld3Qi32_update:
;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
;CHECK: vld3.32 {d16, d18, d20}, {{\[}}[[R:r[0-9]+|lr]]]!
;CHECK: vld3.32 {d17, d19, d21}, {{\[}}[[R]]]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)

View File

@ -14,7 +14,7 @@
define <8 x i8> @vld4i8(i8* %A) nounwind {
;CHECK-LABEL: vld4i8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:64]
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
;Check for a post-increment updating load with register increment.
define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld4i8_update:
;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
;CHECK: vld4.8 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128], r1
%A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
@ -39,7 +39,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
define <4 x i16> @vld4i16(i16* %A) nounwind {
;CHECK-LABEL: vld4i16:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
;CHECK: vld4.16 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:128]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8* %tmp0, i32 16)
%tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
@ -51,7 +51,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
define <2 x i32> @vld4i32(i32* %A) nounwind {
;CHECK-LABEL: vld4i32:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
;CHECK: vld4.32 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* %tmp0, i32 32)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
@ -74,7 +74,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
define <1 x i64> @vld4i64(i64* %A) nounwind {
;CHECK-LABEL: vld4i64:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@ -85,7 +85,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK-LABEL: vld4i64_update:
;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
;CHECK: vld1.64 {d16, d17, d18, d19}, [{{r[0-9]+|lr}}:256]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
%tmp5 = getelementptr i64, i64* %A, i32 4
@ -99,8 +99,8 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
define <16 x i8> @vld4Qi8(i8* %A) nounwind {
;CHECK-LABEL: vld4Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
;CHECK: vld4.8 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:256]!
;CHECK: vld4.8 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:256]
%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8* %A, i32 64)
%tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
@ -111,8 +111,8 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;CHECK-LABEL: vld4Qi16:
;Check for no alignment specifier.
;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}]
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
@ -124,8 +124,8 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;Check for a post-increment updating load.
define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4Qi16_update:
;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
;CHECK: vld4.16 {d16, d18, d20, d22}, [{{r[0-9]+|lr}}:64]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [{{r[0-9]+|lr}}:64]!
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 8)

View File

@ -3,7 +3,7 @@
define <8 x i8> @vld1dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupi8:
;Check the (default) alignment value.
;CHECK: vld1.8 {d16[]}, [r0]
;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
@ -13,7 +13,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
define <8 x i8> @vld1dupi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_preinc:
;CHECK: vld1.8 {d16[]}, [r1]
;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]
%0 = load i8*, i8** %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %0, i32 %b
%1 = load i8, i8* %add.ptr, align 1
@ -26,7 +26,7 @@ entry:
define <8 x i8> @vld1dupi8_postinc_fixed(i8** noalias nocapture %a) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_postinc_fixed:
;CHECK: vld1.8 {d16[]}, [r1]!
;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}]!
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <8 x i8> undef, i8 %1, i32 0
@ -39,7 +39,7 @@ entry:
define <8 x i8> @vld1dupi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {
entry:
;CHECK-LABEL: vld1dupi8_postinc_register:
;CHECK: vld1.8 {d16[]}, [r2], r1
;CHECK: vld1.8 {d16[]}, [{{r[0-9]+|lr}}], r1
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <8 x i8> undef, i8 %1, i32 0
@ -52,7 +52,7 @@ entry:
define <16 x i8> @vld1dupqi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_preinc:
;CHECK: vld1.8 {d16[], d17[]}, [r1]
;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%0 = load i8*, i8** %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %0, i32 %b
%1 = load i8, i8* %add.ptr, align 1
@ -65,7 +65,7 @@ entry:
define <16 x i8> @vld1dupqi8_postinc_fixed(i8** noalias nocapture %a) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_postinc_fixed:
;CHECK: vld1.8 {d16[], d17[]}, [r1]!
;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]!
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <16 x i8> undef, i8 %1, i32 0
@ -78,7 +78,7 @@ entry:
define <16 x i8> @vld1dupqi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind {
entry:
;CHECK-LABEL: vld1dupqi8_postinc_register:
;CHECK: vld1.8 {d16[], d17[]}, [r2], r1
;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}], r1
%0 = load i8*, i8** %a, align 4
%1 = load i8, i8* %0, align 1
%2 = insertelement <16 x i8> undef, i8 %1, i32 0
@ -91,7 +91,7 @@ entry:
define <4 x i16> @vld1dupi16(i16* %A) nounwind {
;CHECK-LABEL: vld1dupi16:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vld1.16 {d16[]}, [r0:16]
;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}:16]
%tmp1 = load i16, i16* %A, align 8
%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@ -100,7 +100,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {
;CHECK-LABEL: vld1dupi16_misaligned:
;CHECK: vld1.16 {d16[]}, [r0]
;CHECK: vld1.16 {d16[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i16, i16* %A, align 1
%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
@ -110,7 +110,7 @@ define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dup_zext:
;CHECK: ldrb r0, [r0]
;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 d16, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i16
@ -122,7 +122,7 @@ define <4 x i16> @load_i16_dup_zext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dup_sext:
;CHECK: ldrsb r0, [r0]
;CHECK: ldrsb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 d16, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = sext i8 %tmp1 to i16
@ -134,7 +134,7 @@ define <4 x i16> @load_i16_dup_sext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i16_dupq_zext:
;CHECK: ldrb r0, [r0]
;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.16 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i16
@ -146,7 +146,7 @@ define <8 x i16> @load_i16_dupq_zext(i8* %A) nounwind {
define <2 x i32> @vld1dupi32(i32* %A) nounwind {
;CHECK-LABEL: vld1dupi32:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld1.32 {d16[]}, [r0:32]
;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]
%tmp1 = load i32, i32* %A, align 8
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
@ -156,7 +156,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {
;CHECK-LABEL: load_i32_dup_zext:
;CHECK: ldrb r0, [r0]
;CHECK: ldrb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.32 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = zext i8 %tmp1 to i32
@ -168,7 +168,7 @@ define <4 x i32> @load_i32_dup_zext(i8* %A) nounwind {
; This sort of looks like a vld1dup, but there's an extension in the way.
define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {
;CHECK-LABEL: load_i32_dup_sext:
;CHECK: ldrsb r0, [r0]
;CHECK: ldrsb r0, [{{r[0-9]+|lr}}]
;CHECK-NEXT: vdup.32 q8, r0
%tmp1 = load i8, i8* %A, align 1
%tmp2 = sext i8 %tmp1 to i32
@ -179,7 +179,7 @@ define <4 x i32> @load_i32_dup_sext(i8* %A) nounwind {
define <2 x float> @vld1dupf(float* %A) nounwind {
;CHECK-LABEL: vld1dupf:
;CHECK: vld1.32 {d16[]}, [r0:32]
;CHECK: vld1.32 {d16[]}, [{{r[0-9]+|lr}}:32]
%tmp0 = load float, float* %A
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@ -189,7 +189,7 @@ define <2 x float> @vld1dupf(float* %A) nounwind {
define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupQi8:
;Check the (default) alignment value.
;CHECK: vld1.8 {d16[], d17[]}, [r0]
;CHECK: vld1.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
@ -198,7 +198,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
define <4 x float> @vld1dupQf(float* %A) nounwind {
;CHECK-LABEL: vld1dupQf:
;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
;CHECK: vld1.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:32]
%tmp0 = load float, float* %A
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
@ -212,7 +212,7 @@ define <4 x float> @vld1dupQf(float* %A) nounwind {
define <8 x i8> @vld2dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi8:
;Check the (default) alignment value.
;CHECK: vld2.8 {d16[], d17[]}, [r0]
;CHECK: vld2.8 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@ -283,7 +283,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi16:
;Check that a power-of-two alignment smaller than the total size of the memory
;being loaded is ignored.
;CHECK: vld2.16 {d16[], d17[]}, [r0]
;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
@ -296,7 +296,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;Check for a post-increment updating load.
define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld2dupi16_update:
;CHECK: vld2.16 {d16[], d17[]}, [r1]!
;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
@ -313,7 +313,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld2dupi16_odd_update:
;CHECK: mov [[INC:r[0-9]+]], #6
;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]]
;CHECK: vld2.16 {d16[], d17[]}, [{{r[0-9]+|lr}}], [[INC]]
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
@ -330,7 +330,7 @@ define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind {
define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
;CHECK: vld2.32 {d16[], d17[]}, [{{r[0-9]+|lr}}:64]
%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
@ -350,7 +350,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x
;Check for a post-increment updating load with register increment.
define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3dupi8_update:
;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
;CHECK: vld3.8 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}], r1
%A = load i8*, i8** %ptr
%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
@ -369,7 +369,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
define <4 x i16> @vld3dupi16(i8* %A) nounwind {
;CHECK-LABEL: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
;CHECK: vld3.16 {d16[], d17[], d18[]}, [{{r[0-9]+|lr}}]
%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
@ -391,7 +391,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x
;Check for a post-increment updating load.
define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4dupi16_update:
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}]!
%A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
@ -415,7 +415,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
;CHECK-LABEL: vld4dupi32:
;Check the alignment value. An 8-byte alignment is allowed here even though
;it is smaller than the total size of the memory being loaded.
;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [{{r[0-9]+|lr}}:64]
%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer

View File

@ -308,7 +308,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check for a post-increment updating load with register increment.
define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vld3laneQi16_update:
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+|lr}}], {{r[0-9]+}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>, <8 x i16>* %B

View File

@ -285,17 +285,17 @@ define void @addCombineToVPADDLq_s8(<16 x i8> *%cbcr, <8 x i16> *%X) nounwind ss
define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_s8:
; CHECK: @ BB#0:
; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
; CHECK-NEXT: vmov.i16 d18, #0x8
; CHECK-NEXT: vneg.s16 d18, d18
; CHECK-NEXT: vext.8 d19, d16, d16, #1
; CHECK-NEXT: vshl.i16 d16, d16, #8
; CHECK-NEXT: vshl.i16 d17, d19, #8
; CHECK-NEXT: vshl.s16 d16, d16, d18
; CHECK-NEXT: vshl.s16 d17, d17, d18
; CHECK-NEXT: vadd.i16 d16, d17, d16
; CHECK-NEXT: vstr d16, [r1]
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: vmov.i16 d16, #0x8
; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
; CHECK-NEXT: vext.8 d17, d18, d16, #1
; CHECK-NEXT: vneg.s16 d16, d16
; CHECK-NEXT: vshl.i16 d18, d18, #8
; CHECK-NEXT: vshl.i16 d17, d17, #8
; CHECK-NEXT: vshl.s16 d18, d18, d16
; CHECK-NEXT: vshl.s16 d16, d17, d16
; CHECK-NEXT: vadd.i16 d16, d16, d18
; CHECK-NEXT: vstr d16, [r1]
; CHECK-NEXT: mov pc, lr
%tmp = load <16 x i8>, <16 x i8>* %cbcr
%tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

View File

@ -39,7 +39,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1f_update:
;CHECK: vst1.32 {d16}, [r1]!
;CHECK: vst1.32 {d16}, [r{{[0-9]+}}]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>, <2 x float>* %B

View File

@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;Check for a post-increment updating store with register increment.
define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst4i8_update:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
;CHECK: vst4.8 {d16, d17, d18, d19}, [r{{[0-9]+}}:128], r2
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
@ -62,7 +62,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst4i64_update:
;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]!
;CHECK: vst1.64 {d16, d17, d18, d19}, [r{{[0-9]+}}]!
%A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>, <1 x i64>* %B
@ -116,8 +116,8 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst4Qf_update:
;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
;CHECK: vst4.32 {d16, d18, d20, d22}, [r[[REG:[0-9]+]]]!
;CHECK: vst4.32 {d17, d19, d21, d23}, [r[[REG]]]!
%A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>, <4 x float>* %B

View File

@ -127,7 +127,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check for a post-increment updating store with register increment.
define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst2lanei16_update:
;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
;CHECK: vst2.16 {d16[1], d17[1]}, [r{{[0-9]+}}], r{{[0-9]+}}
%A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>, <4 x i16>* %B
@ -251,7 +251,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst3laneQi32_update:
;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r{{[0-9]+}}]!
%A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>, <4 x i32>* %B
@ -292,7 +292,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;Check for a post-increment updating store.
define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4lanei8_update:
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r{{[0-9]+}}:32]!
%A = load i8*, i8** %ptr
%tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)

View File

@ -324,26 +324,23 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.
; CHECK-LABEL: cmpsel_trunc:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: add r4, sp, #64
; CHECK-NEXT: add r5, sp, #32
; CHECK-NEXT: add r12, sp, #48
; CHECK-NEXT: add lr, sp, #16
; CHECK-NEXT: vld1.64 {d16, d17}, [r5]
; CHECK-NEXT: vld1.64 {d18, d19}, [r4]
; CHECK-NEXT: vld1.64 {d20, d21}, [lr]
; CHECK-NEXT: vld1.64 {d22, d23}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vcgt.u32 q9, q11, q10
; CHECK-NEXT: vmovn.i32 d17, q8
; CHECK-NEXT: vmovn.i32 d16, q9
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vmov d19, r0, r1
; CHECK-NEXT: vmovn.i16 d16, q8
; CHECK-NEXT: vbsl d16, d19, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: add r12, sp, #48
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: add r12, sp, #32
; CHECK-NEXT: vcgt.u32 q8, q10, q8
; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
; CHECK-NEXT: vcgt.u32 q9, q10, q9
; CHECK-NEXT: vmov d20, r2, r3
; CHECK-NEXT: vmovn.i32 d17, q8
; CHECK-NEXT: vmovn.i32 d16, q9
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vmovn.i16 d16, q8
; CHECK-NEXT: vbsl d16, d18, d20
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%c = icmp ult <8 x i32> %cmp0, %cmp1
%res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
@ -356,28 +353,28 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldr r12, [sp, #40]
; CHECK-NEXT: add lr, sp, #24
; CHECK-NEXT: add r4, sp, #8
; CHECK-NEXT: vld1.64 {d16, d17}, [r4]
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
; CHECK-NEXT: vld1.32 {d20[0]}, [r12:32]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vmov.i8 d17, #0x7
; CHECK-NEXT: vneg.s8 d17, d17
; CHECK-NEXT: vmovl.u8 q9, d20
; CHECK-NEXT: vuzp.8 d16, d18
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vmov d19, r0, r1
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vbsl d16, d19, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: add r12, sp, #8
; CHECK-NEXT: add lr, sp, #24
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: ldr r12, [sp, #40]
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovl.u8 q10, d18
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vuzp.8 d16, d20
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@ -392,25 +389,22 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: add r12, sp, #24
; CHECK-NEXT: add lr, sp, #8
; CHECK-NEXT: vld1.64 {d16, d17}, [lr]
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmov d19, r0, r1
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vmov.i8 d17, #0x7
; CHECK-NEXT: vuzp.8 d16, d18
; CHECK-NEXT: vneg.s8 d17, d17
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vbsl d16, d19, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmov.i8 d18, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vuzp.8 d16, d17
; CHECK-NEXT: vneg.s8 d17, d18
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@ -423,26 +417,23 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1
define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: add r12, sp, #24
; CHECK-NEXT: add lr, sp, #8
; CHECK-NEXT: vldr d20, .LCPI22_0
; CHECK-NEXT: vld1.64 {d16, d17}, [lr]
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vmov d19, r0, r1
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vmov.i8 d17, #0x7
; CHECK-NEXT: vtbl.8 d16, {d16}, d20
; CHECK-NEXT: vneg.s8 d17, d17
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vbsl d16, d19, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: mov r12, sp
; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vldr d18, .LCPI22_0
; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vtbl.8 d16, {d16}, d18
; CHECK-NEXT: vneg.s8 d17, d19
; CHECK-NEXT: vmov d18, r2, r3
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d16, d16, d17
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vbsl d16, d17, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ BB#1:
; CHECK-NEXT: .LCPI22_0:
@ -468,65 +459,63 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
; CHECK-LABEL: vuzp_wide_type:
; CHECK: @ BB#0:
; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: .setfp r11, sp, #16
; CHECK-NEXT: add r11, sp, #16
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, sp, #8
; CHECK-NEXT: bic sp, sp, #15
; CHECK-NEXT: add r5, r11, #52
; CHECK-NEXT: add r7, r11, #32
; CHECK-NEXT: add r4, r11, #44
; CHECK-NEXT: add r6, r11, #24
; CHECK-NEXT: add r12, r11, #60
; CHECK-NEXT: add lr, r11, #40
; CHECK-NEXT: vld1.32 {d17[0]}, [r7:32]
; CHECK-NEXT: vld1.32 {d19[0]}, [r5:32]
; CHECK-NEXT: vld1.32 {d22[0]}, [r12:32]
; CHECK-NEXT: ldr r12, [r11, #64]
; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
; CHECK-NEXT: add r7, r11, #48
; CHECK-NEXT: add r5, r11, #28
; CHECK-NEXT: vld1.32 {d16[0]}, [r6:32]
; CHECK-NEXT: vld1.32 {d18[0]}, [r4:32]
; CHECK-NEXT: add r6, r11, #56
; CHECK-NEXT: add r4, r11, #36
; CHECK-NEXT: vcgt.u32 q10, q11, q10
; CHECK-NEXT: vld1.32 {d19[1]}, [r6:32]
; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
; CHECK-NEXT: add r6, r12, #4
; CHECK-NEXT: vld1.32 {d18[1]}, [r7:32]
; CHECK-NEXT: vld1.32 {d16[1]}, [r5:32]
; CHECK-NEXT: ldr r7, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d18, q10
; CHECK-NEXT: vmov.32 d21[0], r7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vmov.u8 r7, d21[3]
; CHECK-NEXT: vmov.i8 d17, #0x7
; CHECK-NEXT: vuzp.8 d16, d18
; CHECK-NEXT: vmov.8 d23[0], r7
; CHECK-NEXT: vneg.s8 d17, d17
; CHECK-NEXT: add r7, r11, #8
; CHECK-NEXT: vldr d18, .LCPI23_0
; CHECK-NEXT: vld1.8 {d23[1]}, [r6]
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vshl.s8 d20, d16, d17
; CHECK-NEXT: vmov.i8 q8, #0x7
; CHECK-NEXT: vneg.s8 q8, q8
; CHECK-NEXT: vtbl.8 d22, {d20, d21}, d18
; CHECK-NEXT: vld1.64 {d18, d19}, [r7]
; CHECK-NEXT: vshl.i8 q10, q11, #7
; CHECK-NEXT: vmov d23, r2, r3
; CHECK-NEXT: vmov d22, r0, r1
; CHECK-NEXT: vshl.s8 q8, q10, q8
; CHECK-NEXT: vbsl q8, q11, q9
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: sub sp, r11, #16
; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .save {r4, r10, r11, lr}
; CHECK-NEXT: push {r4, r10, r11, lr}
; CHECK-NEXT: .setfp r11, sp, #8
; CHECK-NEXT: add r11, sp, #8
; CHECK-NEXT: bic sp, sp, #15
; CHECK-NEXT: add r12, r11, #32
; CHECK-NEXT: add lr, r11, #60
; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
; CHECK-NEXT: add r12, r11, #24
; CHECK-NEXT: vld1.32 {d22[0]}, [lr:32]
; CHECK-NEXT: add lr, r11, #36
; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
; CHECK-NEXT: add r12, r11, #52
; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
; CHECK-NEXT: add r12, r11, #44
; CHECK-NEXT: vld1.32 {d17[1]}, [lr:32]
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
; CHECK-NEXT: add r12, r11, #40
; CHECK-NEXT: vld1.32 {d20[0]}, [r12:32]
; CHECK-NEXT: ldr r12, [r11, #64]
; CHECK-NEXT: vcgt.u32 q10, q11, q10
; CHECK-NEXT: ldr r4, [r12]
; CHECK-NEXT: vmov.32 d25[0], r4
; CHECK-NEXT: add r4, r11, #28
; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
; CHECK-NEXT: add r4, r11, #56
; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
; CHECK-NEXT: add r4, r11, #48
; CHECK-NEXT: vmov.u8 lr, d25[3]
; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
; CHECK-NEXT: add r4, r12, #4
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d19, q10
; CHECK-NEXT: vldr d20, .LCPI23_0
; CHECK-NEXT: vmov.i8 d18, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vneg.s8 d17, d18
; CHECK-NEXT: vuzp.8 d16, d19
; CHECK-NEXT: vmov.i8 q9, #0x7
; CHECK-NEXT: vshl.i8 d16, d16, #7
; CHECK-NEXT: vneg.s8 q9, q9
; CHECK-NEXT: vshl.s8 d24, d16, d17
; CHECK-NEXT: vmov.8 d17[0], lr
; CHECK-NEXT: vtbl.8 d16, {d24, d25}, d20
; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
; CHECK-NEXT: add r4, r11, #8
; CHECK-NEXT: vshl.i8 q8, q8, #7
; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
; CHECK-NEXT: vshl.s8 q8, q8, q9
; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vbsl q8, q9, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: sub sp, r11, #8
; CHECK-NEXT: pop {r4, r10, r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ BB#1:
; CHECK-NEXT: .LCPI23_0:

View File

@ -1,25 +1,25 @@
; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
; Check that new water is created by splitting the basic block right after the
; Check that new water is created by splitting the basic block after the
; load instruction. Previously, new water was created before the load
; instruction, which caused the pass to fail to converge.
define void @test(i1 %tst) {
; CHECK-LABEL: test:
; CHECK: vldr {{s[0-9]+}}, [[CONST:\.LCPI[0-9]+_[0-9]+]]
; CHECK-NEXT: b.w [[CONTINUE:\.LBB[0-9]+_[0-9]+]]
; CHECK: b.w [[CONTINUE:\.LBB[0-9]+_[0-9]+]]
; CHECK: [[CONST]]:
; CHECK-NEXT: .long
; CHECK: [[CONTINUE]]:
entry:
call i32 @llvm.arm.space(i32 2000, i32 undef)
br i1 %tst, label %true, label %false
true:
%val = phi float [12345.0, %entry], [undef, %false]
call i32 @llvm.arm.space(i32 2000, i32 undef)
call void @bar(float %val)
ret void

View File

@ -35,9 +35,6 @@ entry:
; CHECK: cmp
; CHECK: it eq
; CHECK: cmpeq
; CHECK: itt eq
; CHECK: moveq
; CHECK: popeq
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry