From 729989c981129566ec193e57001de75103b47cf3 Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli Date: Fri, 19 Jun 2020 03:31:01 +0000 Subject: [PATCH] [llvm][SVE] Reg + reg addressing mode for LD1RO. Reviewers: efriedma, sdesmalen Reviewed By: efriedma Subscribers: tschuett, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D80741 --- lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 +- lib/Target/AArch64/SVEInstrFormats.td | 6 +- ...ntrinsics-ld1ro-addressing-mode-reg-reg.ll | 102 ++++++++++++++++++ 3 files changed, 111 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index d6e52f9703d..9bab0e4d33e 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1960,10 +1960,10 @@ let Predicates = [HasSVE, HasMatMulFP64] in { defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro>; defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro>; defm LD1RO_D_IMM : sve_mem_ldor_si<0b11, "ld1rod", Z_d, ZPR64, nxv2i64, nxv2i1, AArch64ld1ro>; - defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8>; - defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16>; - defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32>; - defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64>; + defm LD1RO_B : sve_mem_ldor_ss<0b00, "ld1rob", Z_b, ZPR8, GPR64NoXZRshifted8, nxv16i8, nxv16i1, AArch64ld1ro, am_sve_regreg_lsl0>; + defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro, am_sve_regreg_lsl1>; + defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro, am_sve_regreg_lsl2>; + defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro, am_sve_regreg_lsl3>; defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>; defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>; defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>; diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td index bde17b39356..01e66961cb8 100644 --- a/lib/Target/AArch64/SVEInstrFormats.td +++ b/lib/Target/AArch64/SVEInstrFormats.td @@ -7699,11 +7699,15 @@ class sve_mem_ldor_ss sz, string asm, RegisterOperand VecList, } multiclass sve_mem_ldor_ss sz, string asm, RegisterOperand listty, - ZPRRegOp zprty, RegisterOperand gprty> { + ZPRRegOp zprty, RegisterOperand gprty, ValueType Ty, + ValueType PredTy, SDNode Ld1ro, ComplexPattern AddrCP> { def NAME : sve_mem_ldor_ss; def : InstAlias(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; + + def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), (AddrCP GPR64sp:$base, gprty:$offset))), + (!cast(NAME) PPR3bAny:$gp, GPR64sp:$base, gprty:$offset)>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll b/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll new file mode 100644 index 00000000000..b4ac587c0b7 --- /dev/null +++ b/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm,+bf16 -asm-verbose=0 < %s | FileCheck %s + +; +; LD1ROB +; + +define @ld1rob_i8( %pg, i8* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1rob_i8: +; CHECK-NEXT: ld1rob { z0.b }, p0/z, [x0, x1] +; CHECK-NEXT: ret + %base = getelementptr i8, i8* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv16i8( %pg, i8* %base) + ret %load +} + +; +; LD1ROH +; + +define @ld1roh_i16( %pg, i16* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1roh_i16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr i16, i16* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv8i16( %pg, i16* %base) + ret %load +} + +define @ld1roh_f16( %pg, half* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1roh_f16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr half, half* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv8f16( %pg, half* %base) + ret %load +} + +; bfloat - requires -mattr=+bf16 +define @ld1roh_bf16( %pg, bfloat* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1roh_bf16: +; CHECK-NEXT: ld1roh { z0.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ret + %base = getelementptr bfloat, bfloat* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv8bf16( %pg, bfloat* %base) + ret %load +} + +; +; LD1ROW +; + +define @ld1row_i32( %pg, i32* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1row_i32: +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr i32, i32* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv4i32( %pg, i32* %base) + ret %load +} + +define @ld1row_f32( %pg, float* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1row_f32: +; CHECK-NEXT: ld1row { z0.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ret + %base = getelementptr float, float* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv4f32( %pg, float* %base) + ret %load +} + +; +; LD1ROD +; + +define @ld1rod_i64( %pg, i64* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1rod_i64: +; CHECK-NEXT: ld1rod { z0.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %base = getelementptr i64, i64* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv2i64( %pg, i64* %base) + ret %load +} + +define @ld1rod_f64( %pg, double* %a, i64 %index) nounwind { +; CHECK-LABEL: ld1rod_f64: +; CHECK-NEXT: ld1rod { z0.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ret + %base = getelementptr double, double* %a, i64 %index + %load = call @llvm.aarch64.sve.ld1ro.nxv2f64( %pg, double* %base) + ret %load +} + +declare @llvm.aarch64.sve.ld1ro.nxv16i8(, i8*) + +declare @llvm.aarch64.sve.ld1ro.nxv8i16(, i16*) +declare @llvm.aarch64.sve.ld1ro.nxv8f16(, half*) +declare @llvm.aarch64.sve.ld1ro.nxv8bf16(, bfloat*) + +declare @llvm.aarch64.sve.ld1ro.nxv4i32(, i32*) +declare @llvm.aarch64.sve.ld1ro.nxv4f32(, float*) + +declare @llvm.aarch64.sve.ld1ro.nxv2i64(, i64*) +declare @llvm.aarch64.sve.ld1ro.nxv2f64(, double*)