mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
60e89f6b5d
Summary: Running an end-to-end test last week I noticed that a lot of the ACLE intrinsics that operate differently on vectors of signed and unsigned integers were ending up generating the signed version of the instruction unconditionally. This is because the IR intrinsics had no way to distinguish signed from unsigned: the LLVM type system just calls them both `v8i16` (or whatever), so you need either separate intrinsics for signed and unsigned, or a flag parameter that tells ISel which one to choose. This patch fixes all the problems of that kind that I've noticed, by adding an i32 flag parameter to many of the IR intrinsics which is set to 1 for unsigned (matching the existing practice in cases where we got it right), and conditioning all the isel patterns on that flag. So the fundamental change is in `IntrinsicsARM.td`, changing the low-level IR intrinsics API; there are knock-on changes in `arm_mve.td` (adjusting code gen for the ACLE intrinsics to use the modified API) and in `ARMInstrMVE.td` (adjusting isel to expect the new unsigned flags). The rest of this patch is boringly updating tests. Reviewers: dmgreen, miyuki, MarkMurrayARM Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72270
136 lines
5.2 KiB
LLVM
136 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vrhadd.u8 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <16 x i8> @llvm.arm.mve.vrhadd.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1)
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
declare <16 x i8> @llvm.arm.mve.vrhadd.v16i8(<16 x i8>, <16 x i8>, i32) #1
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_s16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vrhadd.s16 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <8 x i16> @llvm.arm.mve.vrhadd.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
declare <8 x i16> @llvm.arm.mve.vrhadd.v8i16(<8 x i16>, <8 x i16>, i32) #1
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vrhadd.u32 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call <4 x i32> @llvm.arm.mve.vrhadd.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
declare <4 x i32> @llvm.arm.mve.vrhadd.v4i32(<4 x i32>, <4 x i32>, i32) #1
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vrhaddq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_m_s8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.s8 q0, q1, q2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
%2 = tail call <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, <16 x i1> %1, <16 x i8> %inactive)
|
|
ret <16 x i8> %2
|
|
}
|
|
|
|
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #1
|
|
|
|
declare <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>, <16 x i8>) #1
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vrhaddq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_m_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.u16 q0, q1, q2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = tail call <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1, <8 x i16> %inactive)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #1
|
|
|
|
declare <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>, <8 x i16>) #1
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vrhaddq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_m_s32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.s32 q0, q1, q2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <4 x i32> %inactive)
|
|
ret <4 x i32> %2
|
|
}
|
|
|
|
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #1
|
|
|
|
declare <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
|
|
|
|
define arm_aapcs_vfpcc <16 x i8> @test_vrhaddq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_x_u8:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.u8 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
|
%2 = tail call <16 x i8> @llvm.arm.mve.rhadd.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, <16 x i1> %1, <16 x i8> undef)
|
|
ret <16 x i8> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x i16> @test_vrhaddq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_x_u16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.u16 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
|
%2 = tail call <8 x i16> @llvm.arm.mve.rhadd.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1, <8 x i16> undef)
|
|
ret <8 x i16> %2
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <4 x i32> @test_vrhaddq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #0 {
|
|
; CHECK-LABEL: test_vrhaddq_x_u32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vmsr p0, r0
|
|
; CHECK-NEXT: vpst
|
|
; CHECK-NEXT: vrhaddt.u32 q0, q0, q1
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = zext i16 %p to i32
|
|
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
|
%2 = tail call <4 x i32> @llvm.arm.mve.rhadd.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1, <4 x i32> undef)
|
|
ret <4 x i32> %2
|
|
}
|
|
|