diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td index 198260d7c47..7e7ac6ddcc3 100644 --- a/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2258,12 +2258,30 @@ let Predicates = [HasSVE] in { GPR64:$src)>; // Insert FP scalar into vector with scalar index + def : Pat<(nxv2f16 (vector_insert (nxv2f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), + (CPY_ZPmV_H ZPR:$vec, + (CMPEQ_PPzZZ_D (PTRUE_D 31), + (INDEX_II_D 0, 1), + (DUP_ZR_D GPR64:$index)), + $src)>; + def : Pat<(nxv4f16 (vector_insert (nxv4f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), + (CPY_ZPmV_H ZPR:$vec, + (CMPEQ_PPzZZ_S (PTRUE_S 31), + (INDEX_II_S 0, 1), + (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), + $src)>; def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), (CPY_ZPmV_H ZPR:$vec, (CMPEQ_PPzZZ_H (PTRUE_H 31), (INDEX_II_H 0, 1), (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))), $src)>; + def : Pat<(nxv2f32 (vector_insert (nxv2f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)), + (CPY_ZPmV_S ZPR:$vec, + (CMPEQ_PPzZZ_D (PTRUE_D 31), + (INDEX_II_D 0, 1), + (DUP_ZR_D GPR64:$index)), + $src) >; def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)), (CPY_ZPmV_S ZPR:$vec, (CMPEQ_PPzZZ_S (PTRUE_S 31), diff --git a/test/CodeGen/AArch64/sve-insert-element.ll b/test/CodeGen/AArch64/sve-insert-element.ll index c6250ff5f16..da56ae9ba02 100644 --- a/test/CodeGen/AArch64/sve-insert-element.ll +++ b/test/CodeGen/AArch64/sve-insert-element.ll @@ -273,3 +273,82 @@ define @test_insert_into_undef_nxv2f64(double %a) { %b = insertelement undef, double %a, i32 0 ret %b } + +; Insert scalar at index +define @test_insert_with_index_nxv2f16(half %h, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: ret + %res = insertelement undef, half %h, i64 %idx + ret %res +} + +define @test_insert_with_index_nxv4f16(half %h, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s +; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: ret + %res = insertelement undef, half %h, i64 %idx + ret %res +} + +define @test_insert_with_index_nxv8f16(half %h, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, w0 +; CHECK-NEXT: index z2.h, #0, #1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h +; CHECK-NEXT: mov z0.h, p0/m, h0 +; CHECK-NEXT: ret + %res = insertelement undef, half %h, i64 %idx + ret %res +} + +define @test_insert_with_index_nxv2f32(float %f, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z0.s, p0/m, s0 +; CHECK-NEXT: ret + %res = insertelement undef, float %f, i64 %idx + ret %res +} + +define @test_insert_with_index_nxv4f32(float %f, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, w0 +; CHECK-NEXT: index z2.s, #0, #1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, s0 +; CHECK-NEXT: ret + %res = insertelement undef, float %f, i64 %idx + ret %res +} + +define @test_insert_with_index_nxv2f64(double %d, i64 %idx) { +; CHECK-LABEL: test_insert_with_index_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: index z2.d, #0, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, d0 +; CHECK-NEXT: ret + %res = insertelement undef, double %d, i64 %idx + ret %res +}