mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
42a72164a2
This patch adds a new llvm.experimental.stepvector intrinsic, which takes no arguments and returns a linear integer sequence of values of the form <0, 1, ...>. It is primarily intended for scalable vectors, although it will work for fixed width vectors too. It is intended that later patches will make use of this new intrinsic when vectorising induction variables, currently only supported for fixed width. I've added a new CreateStepVector method to the IRBuilder, which will generate a call to this intrinsic for scalable vectors and fall back on creating a ConstantVector for fixed width. For scalable vectors this intrinsic is lowered to a new ISD node called STEP_VECTOR, which takes a single constant integer argument as the step. During lowering this argument is set to a value of 1. The reason for this additional argument at the codegen level is because in future patches we will introduce various generic DAG combines such as mul step_vector(1), 2 -> step_vector(2) add step_vector(1), step_vector(1) -> step_vector(2) shl step_vector(1), 1 -> step_vector(2) etc. that encourage a canonical format for all targets. This hopefully means all other targets supporting scalable vectors can benefit from this too. I've added cost model tests for both fixed width and scalable vectors: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll as well as codegen lowering tests for fixed width and scalable vectors: llvm/test/CodeGen/AArch64/neon-stepvector.ll llvm/test/CodeGen/AArch64/sve-stepvector.ll See this thread for discussion of the intrinsic: https://lists.llvm.org/pipermail/llvm-dev/2021-January/147943.html
182 lines
5.0 KiB
LLVM
182 lines
5.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
|
|
|
|
; LEGAL INTEGER TYPES
|
|
|
|
define <2 x i64> @stepvector_v2i64() {
|
|
; CHECK-LABEL: .LCPI0_0:
|
|
; CHECK-NEXT: .xword 0
|
|
; CHECK-NEXT: .xword 1
|
|
; CHECK-LABEL: stepvector_v2i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI0_0
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define <4 x i32> @stepvector_v4i32() {
|
|
; CHECK-LABEL: .LCPI1_0:
|
|
; CHECK-NEXT: .word 0
|
|
; CHECK-NEXT: .word 1
|
|
; CHECK-NEXT: .word 2
|
|
; CHECK-NEXT: .word 3
|
|
; CHECK-LABEL: stepvector_v4i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI1_0
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define <8 x i16> @stepvector_v8i16() {
|
|
; CHECK-LABEL: .LCPI2_0:
|
|
; CHECK-NEXT: .hword 0
|
|
; CHECK-NEXT: .hword 1
|
|
; CHECK-NEXT: .hword 2
|
|
; CHECK-NEXT: .hword 3
|
|
; CHECK-NEXT: .hword 4
|
|
; CHECK-NEXT: .hword 5
|
|
; CHECK-NEXT: .hword 6
|
|
; CHECK-NEXT: .hword 7
|
|
; CHECK-LABEL: stepvector_v8i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI2_0
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define <16 x i8> @stepvector_v16i8() {
|
|
; CHECK-LABEL: .LCPI3_0:
|
|
; CHECK-NEXT: .byte 0
|
|
; CHECK-NEXT: .byte 1
|
|
; CHECK-NEXT: .byte 2
|
|
; CHECK-NEXT: .byte 3
|
|
; CHECK-NEXT: .byte 4
|
|
; CHECK-NEXT: .byte 5
|
|
; CHECK-NEXT: .byte 6
|
|
; CHECK-NEXT: .byte 7
|
|
; CHECK-NEXT: .byte 8
|
|
; CHECK-NEXT: .byte 9
|
|
; CHECK-NEXT: .byte 10
|
|
; CHECK-NEXT: .byte 11
|
|
; CHECK-NEXT: .byte 12
|
|
; CHECK-NEXT: .byte 13
|
|
; CHECK-NEXT: .byte 14
|
|
; CHECK-NEXT: .byte 15
|
|
; CHECK-LABEL: stepvector_v16i8:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI3_0
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
; ILLEGAL INTEGER TYPES
|
|
|
|
define <4 x i64> @stepvector_v4i64() {
|
|
; CHECK-LABEL: .LCPI4_0:
|
|
; CHECK-NEXT: .xword 0
|
|
; CHECK-NEXT: .xword 1
|
|
; CHECK-LABEL: .LCPI4_1:
|
|
; CHECK-NEXT: .xword 2
|
|
; CHECK-NEXT: .xword 3
|
|
; CHECK-LABEL: stepvector_v4i64:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI4_0
|
|
; CHECK-NEXT: adrp x9, .LCPI4_1
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
|
|
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_1]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
|
|
ret <4 x i64> %0
|
|
}
|
|
|
|
define <16 x i32> @stepvector_v16i32() {
|
|
; CHECK-LABEL: .LCPI5_0:
|
|
; CHECK-NEXT: .word 0
|
|
; CHECK-NEXT: .word 1
|
|
; CHECK-NEXT: .word 2
|
|
; CHECK-NEXT: .word 3
|
|
; CHECK-LABEL: .LCPI5_1:
|
|
; CHECK-NEXT: .word 4
|
|
; CHECK-NEXT: .word 5
|
|
; CHECK-NEXT: .word 6
|
|
; CHECK-NEXT: .word 7
|
|
; CHECK-LABEL: .LCPI5_2:
|
|
; CHECK-NEXT: .word 8
|
|
; CHECK-NEXT: .word 9
|
|
; CHECK-NEXT: .word 10
|
|
; CHECK-NEXT: .word 11
|
|
; CHECK-LABEL: .LCPI5_3:
|
|
; CHECK-NEXT: .word 12
|
|
; CHECK-NEXT: .word 13
|
|
; CHECK-NEXT: .word 14
|
|
; CHECK-NEXT: .word 15
|
|
; CHECK-LABEL: stepvector_v16i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI5_0
|
|
; CHECK-NEXT: adrp x9, .LCPI5_1
|
|
; CHECK-NEXT: adrp x10, .LCPI5_2
|
|
; CHECK-NEXT: adrp x11, .LCPI5_3
|
|
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
|
|
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI5_1]
|
|
; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI5_2]
|
|
; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI5_3]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
|
|
ret <16 x i32> %0
|
|
}
|
|
|
|
define <2 x i32> @stepvector_v2i32() {
|
|
; CHECK-LABEL: .LCPI6_0:
|
|
; CHECK-NEXT: .word 0
|
|
; CHECK-NEXT: .word 1
|
|
; CHECK-LABEL: stepvector_v2i32:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI6_0
|
|
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <2 x i32> @llvm.experimental.stepvector.v2i32()
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i16> @stepvector_v4i16() {
|
|
; CHECK-LABEL: .LCPI7_0:
|
|
; CHECK-NEXT: .hword 0
|
|
; CHECK-NEXT: .hword 1
|
|
; CHECK-NEXT: .hword 2
|
|
; CHECK-NEXT: .hword 3
|
|
; CHECK-LABEL: stepvector_v4i16:
|
|
; CHECK: // %bb.0: // %entry
|
|
; CHECK-NEXT: adrp x8, .LCPI7_0
|
|
; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0]
|
|
; CHECK-NEXT: ret
|
|
entry:
|
|
%0 = call <4 x i16> @llvm.experimental.stepvector.v4i16()
|
|
ret <4 x i16> %0
|
|
}
|
|
|
|
|
|
declare <2 x i64> @llvm.experimental.stepvector.v2i64()
|
|
declare <4 x i32> @llvm.experimental.stepvector.v4i32()
|
|
declare <8 x i16> @llvm.experimental.stepvector.v8i16()
|
|
declare <16 x i8> @llvm.experimental.stepvector.v16i8()
|
|
|
|
declare <4 x i64> @llvm.experimental.stepvector.v4i64()
|
|
declare <16 x i32> @llvm.experimental.stepvector.v16i32()
|
|
declare <2 x i32> @llvm.experimental.stepvector.v2i32()
|
|
declare <4 x i16> @llvm.experimental.stepvector.v4i16()
|