1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00
llvm-mirror/test/CodeGen/AArch64/sve-fixed-length-stores.ll
Paul Walker 833bdd906f [SVE] Code generation for fixed length vector loads & stores.
Summary:
This patch adds base support for code generating fixed length
vector operations targeting a known SVE vector length. To achieve
this we lower fixed length vector operations to equivalent scalable
vector operations, whereby SVE predication is used to limit the
elements processed to those present within the fixed length vector.

Specifically this patch implements load and store operations, which
get lowered to their masked counterparts thusly:

  V = load(Addr) =>
    V = extract_fixed_vector(masked_load(make_pred(V.NumElts), Addr))

  store(V, (Addr)) =>
    masked_store(insert_fixed_vector(V), make_pred(V.NumElts), Addr))

Reviewers: rengolin, efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80385
2020-06-23 09:39:03 +00:00

105 lines
5.1 KiB
LLVM

; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -D#VBYTES=16 -check-prefix=NO_SVE
; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -D#VBYTES=32 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512,VBITS_LE_256
; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -D#VBYTES=64 -check-prefixes=CHECK,VBITS_LE_1024,VBITS_LE_512
; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -D#VBYTES=128 -check-prefixes=CHECK,VBITS_LE_1024
; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -D#VBYTES=256 -check-prefixes=CHECK
; VBYTES represents the useful byte size of a vector register from the code
; generator's point of view. It is clamped to power-of-2 values because
; only power-of-2 vector lengths are considered legal, regardless of the
; user specified vector length.
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE when its registers are no bigger than NEON.
; NO_SVE-NOT: ptrue
; Don't use SVE for 64-bit vectors.
define void @store_v2f32(<2 x float>* %a) #0 {
; CHECK-LABEL: store_v2f32:
; CHECK: str xzr, [x0]
; CHECK: ret
store <2 x float> zeroinitializer, <2 x float>* %a
ret void
}
; Don't use SVE for 128-bit vectors.
define void @store_v4f32(<4 x float>* %a) #0 {
; CHECK-LABEL: store_v4f32:
; CHECK: stp xzr, xzr, [x0]
; CHECK: ret
store <4 x float> zeroinitializer, <4 x float>* %a
ret void
}
define void @store_v8f32(<8 x float>* %a) #0 {
; CHECK-LABEL: store_v8f32:
; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
; CHECK: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
; CHECK: ret
store <8 x float> zeroinitializer, <8 x float>* %a
ret void
}
define void @store_v16f32(<16 x float>* %a) #0 {
; CHECK-LABEL: store_v16f32:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]]
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
; VBITS_LE_256-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
; CHECK: ret
store <16 x float> zeroinitializer, <16 x float>* %a
ret void
}
define void @store_v32f32(<32 x float>* %a) #0 {
; CHECK-LABEL: store_v32f32:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]]
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
; VBITS_LE_512-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
; VBITS_LE_256-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]]
; VBITS_LE_256-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]]
; CHECK: ret
store <32 x float> zeroinitializer, <32 x float>* %a
ret void
}
define void @store_v64f32(<64 x float>* %a) #0 {
; CHECK-LABEL: store_v64f32:
; CHECK-DAG: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]]
; CHECK-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x0]
; VBITS_LE_1024-DAG: add x[[A1:[0-9]+]], x0, #[[#VBYTES]]
; VBITS_LE_1024-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A1]]]
; VBITS_LE_512-DAG: add x[[A2:[0-9]+]], x0, #[[#mul(VBYTES,2)]]
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A2]]]
; VBITS_LE_512-DAG: add x[[A3:[0-9]+]], x0, #[[#mul(VBYTES,3)]]
; VBITS_LE_512-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A3]]]
; VBITS_LE_256-DAG: add x[[A4:[0-9]+]], x0, #[[#mul(VBYTES,4)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A4]]]
; VBITS_LE_256-DAG: add x[[A5:[0-9]+]], x0, #[[#mul(VBYTES,5)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A5]]]
; VBITS_LE_256-DAG: add x[[A6:[0-9]+]], x0, #[[#mul(VBYTES,6)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A6]]]
; VBITS_LE_256-DAG: add x[[A7:[0-9]+]], x0, #[[#mul(VBYTES,7)]]
; VBITS_LE_256-DAG: st1w { z{{[0-9]+}}.s }, [[PG]], [x[[A7]]]
; CHECK: ret
store <64 x float> zeroinitializer, <64 x float>* %a
ret void
}
attributes #0 = { "target-features"="+sve" }