From 19dfc4f3fc3fcb0b625033b459cbe9d180a84823 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 15 Mar 2021 14:56:20 +0000 Subject: [PATCH] [AArch64] Zero extended extract_vector_elt pattern This adds a pattern for i64 zext_inreg(i32 extract_vector_elt X), producing a single UMOVvi16 instruction that is already expected to clear the top bits. The exact pattern that this matches is and(anyext(vector_extract X, lane), 0xff), similar to the sext patterns higher up in the same file. Differential Revision: https://reviews.llvm.org/D98599 --- lib/Target/AArch64/AArch64InstrInfo.td | 7 +++++ test/CodeGen/AArch64/build-vector-extract.ll | 32 -------------------- 2 files changed, 7 insertions(+), 32 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 89c7234485f..26a88aa597f 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5329,6 +5329,13 @@ def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), (i32 0xffff)), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), + VectorIndexB:$idx)))), (i64 0xff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; +def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), + VectorIndexH:$idx)))), (i64 0xffff))), + (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; + defm INS : SIMDIns; def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), diff --git a/test/CodeGen/AArch64/build-vector-extract.ll b/test/CodeGen/AArch64/build-vector-extract.ll index b57148f2a92..2c263d7f5b7 100644 --- a/test/CodeGen/AArch64/build-vector-extract.ll +++ b/test/CodeGen/AArch64/build-vector-extract.ll @@ -208,7 +208,6 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 @@ -221,7 +220,6 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -235,7 +233,6 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 @@ -248,7 +245,6 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -262,7 +258,6 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 @@ -275,7 +270,6 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -289,7 +283,6 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 @@ -302,7 +295,6 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -316,7 +308,6 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 @@ -329,7 +320,6 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -343,7 +333,6 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 @@ -356,7 +345,6 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -370,7 +358,6 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 @@ -383,7 +370,6 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -397,7 +383,6 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 @@ -410,7 +395,6 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: and x8, x8, #0xffff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -426,7 +410,6 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 @@ -439,7 +422,6 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -453,7 +435,6 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 @@ -466,7 +447,6 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -480,7 +460,6 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 @@ -493,7 +472,6 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -507,7 +485,6 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 @@ -520,7 +497,6 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret @@ -534,7 +510,6 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 @@ -547,7 +522,6 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -561,7 +535,6 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 @@ -574,7 +547,6 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -588,7 +560,6 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 @@ -601,7 +572,6 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret @@ -615,7 +585,6 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_undef(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_undef: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: dup v0.2d, x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 @@ -628,7 +597,6 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: and x8, x8, #0xff ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret