1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00
llvm-mirror/test/CodeGen/AArch64/neon-stepvector.ll
David Sherwood 42a72164a2 [IR][SVE] Add new llvm.experimental.stepvector intrinsic
This patch adds a new llvm.experimental.stepvector intrinsic,
which takes no arguments and returns a linear integer sequence of
values of the form <0, 1, ...>. It is primarily intended for
scalable vectors, although it will work for fixed width vectors
too. It is intended that later patches will make use of this
new intrinsic when vectorising induction variables, currently only
supported for fixed width. I've added a new CreateStepVector
method to the IRBuilder, which will generate a call to this
intrinsic for scalable vectors and fall back on creating a
ConstantVector for fixed width.

For scalable vectors this intrinsic is lowered to a new ISD node
called STEP_VECTOR, which takes a single constant integer argument
as the step. During lowering this argument is set to a value of 1.
The reason for this additional argument at the codegen level is
because in future patches we will introduce various generic DAG
combines such as

  mul step_vector(1), 2 -> step_vector(2)
  add step_vector(1), step_vector(1) -> step_vector(2)
  shl step_vector(1), 1 -> step_vector(2)
  etc.

that encourage a canonical format for all targets. This hopefully
means all other targets supporting scalable vectors can benefit
from this too.

I've added cost model tests for both fixed width and scalable
vectors:

  llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
  llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll

as well as codegen lowering tests for fixed width and scalable
vectors:

  llvm/test/CodeGen/AArch64/neon-stepvector.ll
  llvm/test/CodeGen/AArch64/sve-stepvector.ll

See this thread for discussion of the intrinsic:
https://lists.llvm.org/pipermail/llvm-dev/2021-January/147943.html
2021-03-23 10:43:35 +00:00

182 lines
5.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
; LEGAL INTEGER TYPES
define <2 x i64> @stepvector_v2i64() {
; CHECK-LABEL: .LCPI0_0:
; CHECK-NEXT: .xword 0
; CHECK-NEXT: .xword 1
; CHECK-LABEL: stepvector_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: ret
entry:
%0 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
ret <2 x i64> %0
}
define <4 x i32> @stepvector_v4i32() {
; CHECK-LABEL: .LCPI1_0:
; CHECK-NEXT: .word 0
; CHECK-NEXT: .word 1
; CHECK-NEXT: .word 2
; CHECK-NEXT: .word 3
; CHECK-LABEL: stepvector_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: ret
entry:
%0 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
ret <4 x i32> %0
}
define <8 x i16> @stepvector_v8i16() {
; CHECK-LABEL: .LCPI2_0:
; CHECK-NEXT: .hword 0
; CHECK-NEXT: .hword 1
; CHECK-NEXT: .hword 2
; CHECK-NEXT: .hword 3
; CHECK-NEXT: .hword 4
; CHECK-NEXT: .hword 5
; CHECK-NEXT: .hword 6
; CHECK-NEXT: .hword 7
; CHECK-LABEL: stepvector_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
; CHECK-NEXT: ret
entry:
%0 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
ret <8 x i16> %0
}
define <16 x i8> @stepvector_v16i8() {
; CHECK-LABEL: .LCPI3_0:
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .byte 1
; CHECK-NEXT: .byte 2
; CHECK-NEXT: .byte 3
; CHECK-NEXT: .byte 4
; CHECK-NEXT: .byte 5
; CHECK-NEXT: .byte 6
; CHECK-NEXT: .byte 7
; CHECK-NEXT: .byte 8
; CHECK-NEXT: .byte 9
; CHECK-NEXT: .byte 10
; CHECK-NEXT: .byte 11
; CHECK-NEXT: .byte 12
; CHECK-NEXT: .byte 13
; CHECK-NEXT: .byte 14
; CHECK-NEXT: .byte 15
; CHECK-LABEL: stepvector_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: ret
entry:
%0 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
ret <16 x i8> %0
}
; ILLEGAL INTEGER TYPES
define <4 x i64> @stepvector_v4i64() {
; CHECK-LABEL: .LCPI4_0:
; CHECK-NEXT: .xword 0
; CHECK-NEXT: .xword 1
; CHECK-LABEL: .LCPI4_1:
; CHECK-NEXT: .xword 2
; CHECK-NEXT: .xword 3
; CHECK-LABEL: stepvector_v4i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_1]
; CHECK-NEXT: ret
entry:
%0 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
ret <4 x i64> %0
}
define <16 x i32> @stepvector_v16i32() {
; CHECK-LABEL: .LCPI5_0:
; CHECK-NEXT: .word 0
; CHECK-NEXT: .word 1
; CHECK-NEXT: .word 2
; CHECK-NEXT: .word 3
; CHECK-LABEL: .LCPI5_1:
; CHECK-NEXT: .word 4
; CHECK-NEXT: .word 5
; CHECK-NEXT: .word 6
; CHECK-NEXT: .word 7
; CHECK-LABEL: .LCPI5_2:
; CHECK-NEXT: .word 8
; CHECK-NEXT: .word 9
; CHECK-NEXT: .word 10
; CHECK-NEXT: .word 11
; CHECK-LABEL: .LCPI5_3:
; CHECK-NEXT: .word 12
; CHECK-NEXT: .word 13
; CHECK-NEXT: .word 14
; CHECK-NEXT: .word 15
; CHECK-LABEL: stepvector_v16i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: adrp x9, .LCPI5_1
; CHECK-NEXT: adrp x10, .LCPI5_2
; CHECK-NEXT: adrp x11, .LCPI5_3
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI5_1]
; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI5_2]
; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI5_3]
; CHECK-NEXT: ret
entry:
%0 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
ret <16 x i32> %0
}
define <2 x i32> @stepvector_v2i32() {
; CHECK-LABEL: .LCPI6_0:
; CHECK-NEXT: .word 0
; CHECK-NEXT: .word 1
; CHECK-LABEL: stepvector_v2i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: ret
entry:
%0 = call <2 x i32> @llvm.experimental.stepvector.v2i32()
ret <2 x i32> %0
}
define <4 x i16> @stepvector_v4i16() {
; CHECK-LABEL: .LCPI7_0:
; CHECK-NEXT: .hword 0
; CHECK-NEXT: .hword 1
; CHECK-NEXT: .hword 2
; CHECK-NEXT: .hword 3
; CHECK-LABEL: stepvector_v4i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: ret
entry:
%0 = call <4 x i16> @llvm.experimental.stepvector.v4i16()
ret <4 x i16> %0
}
declare <2 x i64> @llvm.experimental.stepvector.v2i64()
declare <4 x i32> @llvm.experimental.stepvector.v4i32()
declare <8 x i16> @llvm.experimental.stepvector.v8i16()
declare <16 x i8> @llvm.experimental.stepvector.v16i8()
declare <4 x i64> @llvm.experimental.stepvector.v4i64()
declare <16 x i32> @llvm.experimental.stepvector.v16i32()
declare <2 x i32> @llvm.experimental.stepvector.v2i32()
declare <4 x i16> @llvm.experimental.stepvector.v4i16()