mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
151143cebb
The predicated MVE intrinsics are generated as, for example, llvm.arm.mve.add.predicated(x, splat(y). p). We need to sink the splat value back into the loop, like we do for other instructions, so we can re-select qr variants. Differential Revision: https://reviews.llvm.org/D87693
694 lines
32 KiB
LLVM
694 lines
32 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
|
|
|
|
define void @vadd(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vadd:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB0_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB0_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vadd.i32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB0_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vsub(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vsub:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB1_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB1_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vsub.i32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB1_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vmul(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vmul:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB2_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB2_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vmul.i32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB2_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vqadd(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vqadd:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB3_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB3_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vqadd.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB3_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vqsub(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vqsub:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB4_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB4_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vqsub.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB4_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vhadd(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vhadd:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB5_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB5_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vhadd.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB5_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vhsub(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vhsub:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB6_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB6_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vhsub.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB6_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, i32 0, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vqdmull(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vqdmull:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB7_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB7_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrh.s32 q0, [r0]
|
|
; CHECK-NEXT: vqdmullb.s16 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB7_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%conv = trunc i32 %c0 to i16
|
|
%.splatinsert = insertelement <8 x i16> undef, i16 %conv, i32 0
|
|
%.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i16>*
|
|
%2 = tail call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %1, i32 2, <4 x i1> %0, <4 x i16> zeroinitializer)
|
|
%3 = sext <4 x i16> %2 to <4 x i32>
|
|
%4 = bitcast <4 x i32> %3 to <8 x i16>
|
|
%5 = tail call <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16> %4, <8 x i16> %.splat, i32 0, <4 x i1> %0, <4 x i32> %3)
|
|
%6 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %5, <4 x i32>* %6, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vqdmulh(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vqdmulh:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB8_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB8_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vqdmulh.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB8_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vqrdmulh(i32* %s1, i32 %c0, i32 %N) {
|
|
; CHECK-LABEL: vqrdmulh:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB9_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB9_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vqrdmulh.s32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB9_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x i32> undef, i32 %c0, i32 0
|
|
%.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi i32* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast i32* %s1.addr.013 to <4 x i32>*
|
|
%2 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %0, <4 x i32> zeroinitializer)
|
|
%3 = tail call <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32> %2, <4 x i32> %.splat, <4 x i1> %0, <4 x i32> %2)
|
|
tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %3, <4 x i32>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds i32, i32* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vaddf(float* %s1, float %c0, i32 %N) {
|
|
; CHECK-LABEL: vaddf:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB10_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB10_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vadd.f32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB10_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x float> undef, float %c0, i32 0
|
|
%.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast float* %s1.addr.013 to <4 x float>*
|
|
%2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
|
|
%3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2)
|
|
tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vsubf(float* %s1, float %c0, i32 %N) {
|
|
; CHECK-LABEL: vsubf:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB11_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB11_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vsub.f32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB11_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x float> undef, float %c0, i32 0
|
|
%.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast float* %s1.addr.013 to <4 x float>*
|
|
%2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
|
|
%3 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2)
|
|
tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vmulf(float* %s1, float %c0, i32 %N) {
|
|
; CHECK-LABEL: vmulf:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r2, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB12_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r2
|
|
; CHECK-NEXT: .LBB12_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r0]
|
|
; CHECK-NEXT: vmul.f32 q0, q0, r1
|
|
; CHECK-NEXT: vstrw.32 q0, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB12_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp11 = icmp sgt i32 %N, 0
|
|
br i1 %cmp11, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%.splatinsert = insertelement <4 x float> undef, float %c0, i32 0
|
|
%.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.013 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.012 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.012)
|
|
%1 = bitcast float* %s1.addr.013 to <4 x float>*
|
|
%2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer)
|
|
%3 = tail call fast <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float> %2, <4 x float> %.splat, <4 x i1> %0, <4 x float> %2)
|
|
tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %3, <4 x float>* %1, i32 4, <4 x i1> %0)
|
|
%add.ptr = getelementptr inbounds float, float* %s1.addr.013, i32 4
|
|
%sub = add nsw i32 %N.addr.012, -4
|
|
%cmp = icmp sgt i32 %N.addr.012, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vfma(float* %s1, float* %s2, float %c0, i32 %N) {
|
|
; CHECK-LABEL: vfma:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r3, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB13_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r3
|
|
; CHECK-NEXT: .LBB13_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vfma.f32 q1, q0, r2
|
|
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB13_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %N, 0
|
|
br i1 %cmp12, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%0 = bitcast float* %s2 to <4 x float>*
|
|
%.splatinsert = insertelement <4 x float> undef, float %c0, i32 0
|
|
%.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013)
|
|
%2 = bitcast float* %s1.addr.014 to <4 x float>*
|
|
%3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
|
|
%4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
|
|
%5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %4, <4 x float> %.splat, <4 x float> %3, <4 x i1> %1)
|
|
tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1)
|
|
%add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4
|
|
%sub = add nsw i32 %N.addr.013, -4
|
|
%cmp = icmp sgt i32 %N.addr.013, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @vfmas(float* %s1, float* %s2, float %c0, i32 %N) {
|
|
; CHECK-LABEL: vfmas:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: .save {r7, lr}
|
|
; CHECK-NEXT: push {r7, lr}
|
|
; CHECK-NEXT: cmp r3, #1
|
|
; CHECK-NEXT: it lt
|
|
; CHECK-NEXT: poplt {r7, pc}
|
|
; CHECK-NEXT: .LBB14_1: @ %while.body.lr.ph
|
|
; CHECK-NEXT: dlstp.32 lr, r3
|
|
; CHECK-NEXT: .LBB14_2: @ %while.body
|
|
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: vldrw.u32 q0, [r1]
|
|
; CHECK-NEXT: vldrw.u32 q1, [r0]
|
|
; CHECK-NEXT: vfmas.f32 q1, q0, r2
|
|
; CHECK-NEXT: vstrw.32 q1, [r0], #16
|
|
; CHECK-NEXT: letp lr, .LBB14_2
|
|
; CHECK-NEXT: @ %bb.3: @ %while.end
|
|
; CHECK-NEXT: pop {r7, pc}
|
|
entry:
|
|
%cmp12 = icmp sgt i32 %N, 0
|
|
br i1 %cmp12, label %while.body.lr.ph, label %while.end
|
|
|
|
while.body.lr.ph: ; preds = %entry
|
|
%0 = bitcast float* %s2 to <4 x float>*
|
|
%.splatinsert = insertelement <4 x float> undef, float %c0, i32 0
|
|
%.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %while.body
|
|
|
|
while.body: ; preds = %while.body.lr.ph, %while.body
|
|
%s1.addr.014 = phi float* [ %s1, %while.body.lr.ph ], [ %add.ptr, %while.body ]
|
|
%N.addr.013 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %N.addr.013)
|
|
%2 = bitcast float* %s1.addr.014 to <4 x float>*
|
|
%3 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %2, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
|
|
%4 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %1, <4 x float> zeroinitializer)
|
|
%5 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %3, <4 x float> %4, <4 x float> %.splat, <4 x i1> %1)
|
|
tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %5, <4 x float>* %2, i32 4, <4 x i1> %1)
|
|
%add.ptr = getelementptr inbounds float, float* %s1.addr.014, i32 4
|
|
%sub = add nsw i32 %N.addr.013, -4
|
|
%cmp = icmp sgt i32 %N.addr.013, 4
|
|
br i1 %cmp, label %while.body, label %while.end
|
|
|
|
while.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
|
|
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.add.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
|
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
|
|
declare <4 x i32> @llvm.arm.mve.sub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.mul.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.qadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.qsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.hsub.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>)
|
|
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
|
|
declare <4 x i32> @llvm.arm.mve.vqdmull.predicated.v4i32.v8i16.v4i1(<8 x i16>, <8 x i16>, i32, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.qdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
|
declare <4 x i32> @llvm.arm.mve.qrdmulh.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>)
|
|
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
|
|
declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
|
|
declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
|
|
declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
|
|
declare <4 x float> @llvm.arm.mve.mul.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>)
|
|
declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>)
|