1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AArch64/split-vector-insert.ll
David Sherwood 40e085b6bd [CodeGen] Improve code generation for clamping of constant indices with scalable vectors
When trying to clamp a constant index into a scalable vector we can
test if the index is less than the minimum number of elements in the
vector. If so, we can simply return the index because we know it is
guaranteed to fit inside the vector.

Differential Revision: https://reviews.llvm.org/D100639
2021-04-19 08:34:17 +01:00

110 lines
5.0 KiB
LLVM

; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION
; RUN: llc < %s | FileCheck %s
; REQUIRES: asserts
target triple = "aarch64-unknown-linux-gnu"
attributes #0 = {"target-features"="+sve"}
declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64>, <8 x i64>, i64)
declare <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double>, <8 x double>, i64)
define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %b) #0 {
; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2>
; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4>
; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6>
; CHECK-LABEL: test_nxv2i64_v8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x9
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [sp]
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #1
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str q2, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #2
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: str q3, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
; CHECK-NEXT: mov w8, #6
; CHECK-NEXT: cmp x9, #6 // =6
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #3
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: str q4, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl]
; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> %a, <8 x i64> %b, i64 0)
ret <vscale x 2 x i64> %r
}
define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x double> %b) #0 {
; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2f64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2f64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2>
; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2f64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4>
; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2f64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6>
; CHECK-LABEL: test_nxv2f64_v8f64:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cntd x9
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [sp]
; CHECK-NEXT: sub x9, x9, #1 // =1
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: mov w8, #2
; CHECK-NEXT: cmp x9, #2 // =2
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #1
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: str q2, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: mov w8, #4
; CHECK-NEXT: cmp x9, #4 // =4
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #2
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: str q3, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl]
; CHECK-NEXT: mov w8, #6
; CHECK-NEXT: cmp x9, #6 // =6
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: addvl x10, sp, #3
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: str q4, [x10, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl]
; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%r = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> %a, <8 x double> %b, i64 0)
ret <vscale x 2 x double> %r
}