1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
Simon Tatham 7180efac21 [ARM,MVE] Support immediate vbicq,vorrq,vmvnq intrinsics.
Summary:
Immediate vmvnq is code-generated as a simple vector constant in IR,
and left to the backend to recognize that it can be created with an
MVE VMVN instruction. The predicated version is represented as a
select between the input and the same constant, and I've added a
Tablegen isel rule to turn that into a predicated VMVN. (That should
be better than the previous VMVN + VPSEL: it's the same number of
instructions but now it can fold into an adjacent VPT block.)

The unpredicated forms of VBIC and VORR are done by enabling the same
isel lowering as for NEON, recognizing appropriate immediates and
rewriting them as ARMISD::VBICIMM / ARMISD::VORRIMM SDNodes, which I
then instruction-select into the right MVE instructions (now that I've
also reworked those instructions to use the same MC operand encoding).
In order to do that, I had to promote the Tablegen SDNode instance
`NEONvorrImm` to a general `ARMvorrImm` available in MVE as well, and
similarly for `NEONvbicImm`.

The predicated forms of VBIC and VORR are represented as a vector
select between the original input vector and the output of the
unpredicated operation. The main convenience of this is that it still
lets me use the existing isel lowering for VBICIMM/VORRIMM, and not
have to write another copy of the operand encoding translation code.

This intrinsic family is the first to use the `imm_simd` system I put
into the MveEmitter tablegen backend. So, naturally, it showed up a
bug or two (emitting bogus range checks and the like). Fixed those,
and added a full set of tests for the permissible immediates in the
existing Sema test.

Also adjusted the isel pattern for `vmovlb.u8`, which stopped matching
because lowering started turning its input into a VBICIMM. Now it
recognizes the VBICIMM instead.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72934
2020-01-23 11:53:52 +00:00

366 lines
12 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) {
; CHECK-LABEL: test_vbicq_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) {
; CHECK-LABEL: test_vbicq_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
ret <4 x i32> %0
}
; The immediate in this case is legal for a VMVN but not for a VBIC,
; so in this case we expect to see the constant being prepared in
; another register.
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_illegal(<4 x i32> %a) {
; CHECK-LABEL: test_vbicq_n_u32_illegal:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q1, #0x54ff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %a, <i32 -21760, i32 -21760, i32 -21760, i32 -21760>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) {
; CHECK-LABEL: test_vorrq_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) {
; CHECK-LABEL: test_vorrq_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
ret <8 x i16> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) {
; CHECK-LABEL: test_vorrq_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
ret <4 x i32> %0
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vbicq_m_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vbict.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u16_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i16 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u16_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i16 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600>
%3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a
ret <8 x i16> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh0:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x64
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x6400
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x640000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) {
; CHECK-LABEL: test_vorrq_m_n_u32_sh24:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vorrt.i32 q0, #0x64000000
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600>
%3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a
ret <4 x i32> %3
}
define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() {
; CHECK-LABEL: test_vmvnq_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i16 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
ret <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>
}
define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() {
; CHECK-LABEL: test_vmvnq_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn.i32 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
ret <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>
}
define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) {
; CHECK-LABEL: test_vmvnq_m_n_u16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmvnt.i16 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
%2 = select <8 x i1> %1, <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>, <8 x i16> %inactive
ret <8 x i16> %2
}
define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) {
; CHECK-LABEL: test_vmvnq_m_n_u32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpst
; CHECK-NEXT: vmvnt.i32 q0, #0xaa00
; CHECK-NEXT: bx lr
entry:
%0 = zext i16 %p to i32
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
%2 = select <4 x i1> %1, <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>, <4 x i32> %inactive
ret <4 x i32> %2
}
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)