llvm-mirror/test/CodeGen/Thumb2/mve-intrinsics/vandq.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vandq_u8:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vand q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %0 = and <16 x i8> %b, %a
  ret <16 x i8> %0
}

define arm_aapcs_vfpcc <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vandq_s16:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vand q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %0 = and <8 x i16> %b, %a
  ret <8 x i16> %0
}

define arm_aapcs_vfpcc <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vandq_u32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vand q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %0 = and <4 x i32> %b, %a
  ret <4 x i32> %0
}

define arm_aapcs_vfpcc <4 x float> @test_vandq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {
; CHECK-LABEL: test_vandq_f32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vand q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %0 = bitcast <4 x float> %a to <4 x i32>
  %1 = bitcast <4 x float> %b to <4 x i32>
  %2 = and <4 x i32> %1, %0
  %3 = bitcast <4 x i32> %2 to <4 x float>
  ret <4 x float> %3
}

define arm_aapcs_vfpcc <16 x i8> @test_vandq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_m_s8:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q1, q2
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
  %2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)
  ret <16 x i8> %2
}

declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2

declare <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2

define arm_aapcs_vfpcc <8 x i16> @test_vandq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_m_u16:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q1, q2
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
  %2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)
  ret <8 x i16> %2
}

declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2

declare <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2

define arm_aapcs_vfpcc <4 x i32> @test_vandq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_m_s32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q1, q2
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
  %2 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)
  ret <4 x i32> %2
}

declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2

declare <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2

define arm_aapcs_vfpcc <8 x half> @test_vandq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_m_f16:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q1, q2
; CHECK-NEXT:    bx lr
entry:
  %0 = bitcast <8 x half> %a to <8 x i16>
  %1 = bitcast <8 x half> %b to <8 x i16>
  %2 = zext i16 %p to i32
  %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
  %4 = bitcast <8 x half> %inactive to <8 x i16>
  %5 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %0, <8 x i16> %1, <8 x i1> %3, <8 x i16> %4)
  %6 = bitcast <8 x i16> %5 to <8 x half>
  ret <8 x half> %6
}

define arm_aapcs_vfpcc <16 x i8> @test_vandq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_x_u8:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
  %2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> undef)
  ret <16 x i8> %2
}

define arm_aapcs_vfpcc <8 x i16> @test_vandq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_x_s16:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
  %2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef)
  ret <8 x i16> %2
}

define arm_aapcs_vfpcc <4 x i32> @test_vandq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_x_u32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %0 = zext i16 %p to i32
  %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
  %2 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> undef)
  ret <4 x i32> %2
}

define arm_aapcs_vfpcc <4 x float> @test_vandq_m_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {
; CHECK-LABEL: test_vandq_m_f32:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmsr p0, r0
; CHECK-NEXT:    vpst
; CHECK-NEXT:    vandt q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %0 = bitcast <4 x float> %a to <4 x i32>
  %1 = bitcast <4 x float> %b to <4 x i32>
  %2 = zext i16 %p to i32
  %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
  %4 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> undef)
  %5 = bitcast <4 x i32> %4 to <4 x float>
  ret <4 x float> %5
}
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s \| FileCheck %s`

			`define arm_aapcs_vfpcc <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {`
			`; CHECK-LABEL: test_vandq_u8:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vand q0, q1, q0`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = and <16 x i8> %b, %a`
			`ret <16 x i8> %0`
			`}`

[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`define arm_aapcs_vfpcc <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr #0 {`
			`; CHECK-LABEL: test_vandq_s16:`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vand q0, q1, q0`
			`; CHECK-NEXT: bx lr`
			`entry:`
[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`%0 = and <8 x i16> %b, %a`
			`ret <8 x i16> %0`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`}`

[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`define arm_aapcs_vfpcc <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr #0 {`
			`; CHECK-LABEL: test_vandq_u32:`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vand q0, q1, q0`
			`; CHECK-NEXT: bx lr`
			`entry:`
[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`%0 = and <4 x i32> %b, %a`
			`ret <4 x i32> %0`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`}`

			`define arm_aapcs_vfpcc <4 x float> @test_vandq_f32(<4 x float> %a, <4 x float> %b) local_unnamed_addr #0 {`
			`; CHECK-LABEL: test_vandq_f32:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vand q0, q1, q0`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = bitcast <4 x float> %a to <4 x i32>`
			`%1 = bitcast <4 x float> %b to <4 x i32>`
			`%2 = and <4 x i32> %1, %0`
			`%3 = bitcast <4 x i32> %2 to <4 x float>`
			`ret <4 x float> %3`
			`}`

			`define arm_aapcs_vfpcc <16 x i8> @test_vandq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_m_s8:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q1, q2`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)`
			`%2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> %inactive)`
			`ret <16 x i8> %2`
			`}`

			`declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) #2`

			`declare <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, <16 x i1>, <16 x i8>) #2`

			`define arm_aapcs_vfpcc <8 x i16> @test_vandq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_m_u16:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q1, q2`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)`
			`%2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> %inactive)`
			`ret <8 x i16> %2`
			`}`

			`declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) #2`

			`declare <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #2`

[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`define arm_aapcs_vfpcc <4 x i32> @test_vandq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_m_s32:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q1, q2`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)`
			`%2 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> %inactive)`
			`ret <4 x i32> %2`
			`}`

			`declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) #2`

			`declare <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, <4 x i1>, <4 x i32>) #2`

			`define arm_aapcs_vfpcc <8 x half> @test_vandq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_m_f16:`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q1, q2`
			`; CHECK-NEXT: bx lr`
[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`entry:`
			`%0 = bitcast <8 x half> %a to <8 x i16>`
			`%1 = bitcast <8 x half> %b to <8 x i16>`
			`%2 = zext i16 %p to i32`
			`%3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)`
			`%4 = bitcast <8 x half> %inactive to <8 x i16>`
			`%5 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %0, <8 x i16> %1, <8 x i1> %3, <8 x i16> %4)`
			`%6 = bitcast <8 x i16> %5 to <8 x half>`
			`ret <8 x half> %6`
			`}`

			`define arm_aapcs_vfpcc <16 x i8> @test_vandq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_x_u8:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q0, q1`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)`
			`%2 = tail call <16 x i8> @llvm.arm.mve.and.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i1> %1, <16 x i8> undef)`
			`ret <16 x i8> %2`
			`}`

			`define arm_aapcs_vfpcc <8 x i16> @test_vandq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_x_s16:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q0, q1`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)`
			`%2 = tail call <8 x i16> @llvm.arm.mve.and.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i1> %1, <8 x i16> undef)`
			`ret <8 x i16> %2`
			`}`

			`define arm_aapcs_vfpcc <4 x i32> @test_vandq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_x_u32:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q0, q1`
			`; CHECK-NEXT: bx lr`
			`entry:`
			`%0 = zext i16 %p to i32`
			`%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)`
			`%2 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i1> %1, <4 x i32> undef)`
			`ret <4 x i32> %2`
			`}`

			`define arm_aapcs_vfpcc <4 x float> @test_vandq_m_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) local_unnamed_addr #1 {`
			`; CHECK-LABEL: test_vandq_m_f32:`
			`; CHECK: @ %bb.0: @ %entry`
			`; CHECK-NEXT: vmsr p0, r0`
			`; CHECK-NEXT: vpst`
			`; CHECK-NEXT: vandt q0, q0, q1`
			`; CHECK-NEXT: bx lr`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`entry:`
			`%0 = bitcast <4 x float> %a to <4 x i32>`
			`%1 = bitcast <4 x float> %b to <4 x i32>`
			`%2 = zext i16 %p to i32`
			`%3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)`
[ARM][MVE][Intrinsics] Add _x() variants of my _m() intrinsics. Summary: Better use of multiclass is used, and this helped find some existing bugs in the predicated VMULL* intrinsics, which are now fixed. The refactored VMULL[TB]Q_(INT\|POLY)_M() intrinsics were discovered to have an argument ("inactive") with incorrect type, and this required a fix that is included in this whole patch. The argument "inactive" should have been the same width (per vector element) as the return type of the intrinsic, but was not in the case where the return type was double the element width of the input types. To assist in testing the multiclassing , and to thwart further gremlins, the unit tests are improved in scope. The .ll tests are all generated by a small bit of throw-away scripting from the corresponding .c tests, and as such the diffs are large and nasty. Look at the file rather than the diff. Reviewers: dmgreen, miyuki, ostannard, simon_tatham Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71421 2019-12-11 18:53:12 +01:00			`%4 = tail call <4 x i32> @llvm.arm.mve.and.predicated.v4i32.v4i1(<4 x i32> %0, <4 x i32> %1, <4 x i1> %3, <4 x i32> undef)`
			`%5 = bitcast <4 x i32> %4 to <4 x float>`
			`ret <4 x float> %5`
[ARM][MVE][Intrinsics] Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Summary: Add MVE VAND/VORR/VORN/VEOR/VBIC intrinsics. Add unit tests. Reviewers: simon_tatham, ostannard, dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70547 2019-11-15 12:30:15 +01:00			`}`