mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
f69dc8533f
This avoids the use of the vector unit for copying from scalar to vector. There is an extra ptrue instruction, but a predicate register with the ptrue pattern populated is likely to be free in the context of real code. Tests were generated from a template to cover the axes mentioned at the top of the test file. Co-authored-by: Francesco Petrogalli <francesco.petrogalli@arm.com> Differential Revision: https://reviews.llvm.org/D103170
725 lines
28 KiB
LLVM
725 lines
28 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
|
|
;
|
|
; Check that ldr1* instruction is generated to splat scalar during load,
|
|
; rather than mov from scalar to vector register (which would require the vector unit).
|
|
;
|
|
; one-off: ld1r_stack checks that ldr1b works with stack objects.
|
|
;
|
|
; Test axes:
|
|
; types = [i8, i16, i32, i64, half, float, double]
|
|
; methods = [direct load, gep upper bound - 1, gep out of range x {neg,pos}, sext..., zext..., unpacked_floats...]
|
|
;
|
|
|
|
@g8 = external global i8
|
|
|
|
; One-off test for splatted value coming from stack load.
|
|
define <vscale x 16 x i8> @ld1r_stack() {
|
|
; CHECK-LABEL: ld1r_stack:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub sp, sp, #16 // =16
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: adrp x8, :got:g8
|
|
; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8]
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ldrb w8, [x8]
|
|
; CHECK-NEXT: strb w8, [sp, #12]
|
|
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14]
|
|
; CHECK-NEXT: add sp, sp, #16 // =16
|
|
; CHECK-NEXT: ret
|
|
%valp = alloca i8
|
|
%valp2 = load volatile i8, i8* @g8
|
|
store volatile i8 %valp2, i8* %valp
|
|
%valp3 = getelementptr i8, i8* %valp, i32 2
|
|
%val = load i8, i8* %valp3
|
|
%1 = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
|
|
%2 = shufflevector <vscale x 16 x i8> %1, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
|
|
ret <vscale x 16 x i8> %2
|
|
}
|
|
|
|
define <vscale x 16 x i8> @ld1rb(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
|
|
%shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
|
|
ret <vscale x 16 x i8> %shf
|
|
}
|
|
|
|
define <vscale x 16 x i8> @ld1rb_gep(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0, #63]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i8, i8* %valp, i32 63
|
|
%val = load i8, i8* %valp2
|
|
%ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
|
|
%shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
|
|
ret <vscale x 16 x i8> %shf
|
|
}
|
|
|
|
define <vscale x 16 x i8> @ld1rb_gep_out_of_range_up(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #64 // =64
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i8, i8* %valp, i32 64
|
|
%val = load i8, i8* %valp2
|
|
%ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
|
|
%shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
|
|
ret <vscale x 16 x i8> %shf
|
|
}
|
|
|
|
define <vscale x 16 x i8> @ld1rb_gep_out_of_range_down(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #1 // =1
|
|
; CHECK-NEXT: ptrue p0.b
|
|
; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i8, i8* %valp, i32 -1
|
|
%val = load i8, i8* %valp2
|
|
%ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
|
|
%shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
|
|
ret <vscale x 16 x i8> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rb_i8_i16_zext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i16_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = zext i8 %val to i16
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rb_i8_i16_sext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i16_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = sext i8 %val to i16
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rb_i8_i32_zext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i32_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = zext i8 %val to i32
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rb_i8_i32_sext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i32_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = sext i8 %val to i32
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rb_i8_i64_zext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i64_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = zext i8 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rb_i8_i64_sext(i8* %valp) {
|
|
; CHECK-LABEL: ld1rb_i8_i64_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i8, i8* %valp
|
|
%ext = sext i8 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rh(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i16, i16* %valp
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rh_gep(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i16, i16* %valp, i32 63
|
|
%val = load i16, i16* %valp2
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rh_gep_out_of_range_up(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #128 // =128
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i16, i16* %valp, i32 64
|
|
%val = load i16, i16* %valp2
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 8 x i16> @ld1rh_gep_out_of_range_down(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #2 // =2
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i16, i16* %valp, i32 -1
|
|
%val = load i16, i16* %valp2
|
|
%ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x i16> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rh_i16_i32_zext(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_i16_i32_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i16, i16* %valp
|
|
%ext = zext i16 %val to i32
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rh_i16_i32_sext(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_i16_i32_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i16, i16* %valp
|
|
%ext = sext i16 %val to i32
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rh_i16_i64_zext(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_i16_i64_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i16, i16* %valp
|
|
%ext = zext i16 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rh_i16_i64_sext(i16* %valp) {
|
|
; CHECK-LABEL: ld1rh_i16_i64_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i16, i16* %valp
|
|
%ext = sext i16 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rw(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i32, i32* %valp
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rw_gep(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i32, i32* %valp, i32 63
|
|
%val = load i32, i32* %valp2
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rw_gep_out_of_range_up(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #256 // =256
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i32, i32* %valp, i32 64
|
|
%val = load i32, i32* %valp2
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 4 x i32> @ld1rw_gep_out_of_range_down(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #4 // =4
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i32, i32* %valp, i32 -1
|
|
%val = load i32, i32* %valp2
|
|
%ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x i32> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rw_i32_i64_zext(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw_i32_i64_zext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i32, i32* %valp
|
|
%ext = zext i32 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rw_i32_i64_sext(i32* %valp) {
|
|
; CHECK-LABEL: ld1rw_i32_i64_sext:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i32, i32* %valp
|
|
%ext = sext i32 %val to i64
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rd(i64* %valp) {
|
|
; CHECK-LABEL: ld1rd:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load i64, i64* %valp
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rd_gep(i64* %valp) {
|
|
; CHECK-LABEL: ld1rd_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i64, i64* %valp, i32 63
|
|
%val = load i64, i64* %valp2
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rd_gep_out_of_range_up(i64* %valp) {
|
|
; CHECK-LABEL: ld1rd_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #512 // =512
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i64, i64* %valp, i32 64
|
|
%val = load i64, i64* %valp2
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 2 x i64> @ld1rd_gep_out_of_range_down(i64* %valp) {
|
|
; CHECK-LABEL: ld1rd_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #8 // =8
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr i64, i64* %valp, i32 -1
|
|
%val = load i64, i64* %valp2
|
|
%ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x i64> %shf
|
|
}
|
|
|
|
define <vscale x 8 x half> @ld1rh_half(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load half, half* %valp
|
|
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x half> %shf
|
|
}
|
|
|
|
define <vscale x 8 x half> @ld1rh_half_gep(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 63
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x half> %shf
|
|
}
|
|
|
|
define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_up(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #128 // =128
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 64
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x half> %shf
|
|
}
|
|
|
|
define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_down(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #2 // =2
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 -1
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
|
|
ret <vscale x 8 x half> %shf
|
|
}
|
|
|
|
define <vscale x 4 x half> @ld1rh_half_unpacked4(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load half, half* %valp
|
|
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x half> %shf
|
|
}
|
|
|
|
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked4_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0, #126]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 63
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x half> %shf
|
|
}
|
|
|
|
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_up(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #128 // =128
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 64
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x half> %shf
|
|
}
|
|
|
|
define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_down(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #2 // =2
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 -1
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x half> %shf
|
|
}
|
|
|
|
define <vscale x 2 x half> @ld1rh_half_unpacked2(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load half, half* %valp
|
|
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x half> %shf
|
|
}
|
|
|
|
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked2_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0, #126]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 63
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x half> %shf
|
|
}
|
|
|
|
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_up(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #128 // =128
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 64
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x half> %shf
|
|
}
|
|
|
|
define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_down(half* %valp) {
|
|
; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #2 // =2
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr half, half* %valp, i32 -1
|
|
%val = load half, half* %valp2
|
|
%ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x half> %shf
|
|
}
|
|
|
|
define <vscale x 4 x float> @ld1rw_float(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load float, float* %valp
|
|
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x float> %shf
|
|
}
|
|
|
|
define <vscale x 4 x float> @ld1rw_float_gep(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 63
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x float> %shf
|
|
}
|
|
|
|
define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_up(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #256 // =256
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 64
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x float> %shf
|
|
}
|
|
|
|
define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_down(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #4 // =4
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 -1
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
|
|
ret <vscale x 4 x float> %shf
|
|
}
|
|
|
|
define <vscale x 2 x float> @ld1rw_float_unpacked2(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_unpacked2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load float, float* %valp
|
|
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x float> %shf
|
|
}
|
|
|
|
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_unpacked2_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0, #252]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 63
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x float> %shf
|
|
}
|
|
|
|
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_up(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #256 // =256
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 64
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x float> %shf
|
|
}
|
|
|
|
define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_down(float* %valp) {
|
|
; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #4 // =4
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr float, float* %valp, i32 -1
|
|
%val = load float, float* %valp2
|
|
%ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x float> %shf
|
|
}
|
|
|
|
define <vscale x 2 x double> @ld1rd_double(double* %valp) {
|
|
; CHECK-LABEL: ld1rd_double:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
|
|
; CHECK-NEXT: ret
|
|
%val = load double, double* %valp
|
|
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x double> %shf
|
|
}
|
|
|
|
define <vscale x 2 x double> @ld1rd_double_gep(double* %valp) {
|
|
; CHECK-LABEL: ld1rd_double_gep:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr double, double* %valp, i32 63
|
|
%val = load double, double* %valp2
|
|
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x double> %shf
|
|
}
|
|
|
|
define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_up(double* %valp) {
|
|
; CHECK-LABEL: ld1rd_double_gep_out_of_range_up:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: add x8, x0, #512 // =512
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr double, double* %valp, i32 64
|
|
%val = load double, double* %valp2
|
|
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x double> %shf
|
|
}
|
|
|
|
define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_down(double* %valp) {
|
|
; CHECK-LABEL: ld1rd_double_gep_out_of_range_down:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: sub x8, x0, #8 // =8
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
|
|
; CHECK-NEXT: ret
|
|
%valp2 = getelementptr double, double* %valp, i32 -1
|
|
%val = load double, double* %valp2
|
|
%ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
|
|
%shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
|
|
ret <vscale x 2 x double> %shf
|
|
}
|