From f2f14a2d434c3d7f378432b1f5c1b9e750a4da5a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 7 Feb 2014 00:16:33 +0000 Subject: [PATCH] X86: Resolve a long standing FIXME and properly isel pextr[bw]. Generalize the AArch64 .td nodes for AssertZext and AssertSext. Use them to match the relevant pextr store instructions. The test widen_load-2.ll requires a slight change because with the stores gone, the remaining instructions are scheduled in a different order. Add test cases for SSE4 and AVX variants. Resolves rdar://13414672. Patch by Adam Nemet . llvm-svn: 200957 --- include/llvm/Target/TargetSelectionDAG.td | 6 ++++++ lib/Target/AArch64/AArch64InstrNEON.td | 5 ----- lib/Target/X86/README-SSE.txt | 5 ----- lib/Target/X86/X86InstrSSE.td | 12 ++++-------- test/CodeGen/X86/extract-store.ll | 22 ++++++++++++++++++++++ test/CodeGen/X86/widen_load-2.ll | 2 +- 6 files changed, 33 insertions(+), 19 deletions(-) create mode 100644 test/CodeGen/X86/extract-store.ll diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index d94bdc67bf0..18ff04c079a 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -492,6 +492,12 @@ def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN", // Do not use cvt directly. Use cvt forms below def cvt : SDNode<"ISD::CONVERT_RNDSAT", SDTConvertOp>; +def SDT_assertext : SDTypeProfile<1, 1, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; +def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; +def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; + + //===----------------------------------------------------------------------===// // Selection DAG Condition Codes diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 1b7e0f93b95..68a499b7533 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -64,11 +64,6 @@ def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; -def SDT_assertext : SDTypeProfile<1, 1, - [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; -def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; -def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; - //===----------------------------------------------------------------------===// // Addressing-mode instantiations //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index adfa7fa1232..71329b06692 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -494,11 +494,6 @@ is memory. //===---------------------------------------------------------------------===// -SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext -sitting between the truncate and the extract. - -//===---------------------------------------------------------------------===// - INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert any number of 0.0 simultaneously. Currently we only use it for simple insertions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 34b3b855bf1..ea91b5b1042 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6210,10 +6210,8 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; -// FIXME: -// There's an AssertZext in the way of writing the store pattern -// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) + [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1), + imm:$src2)))), addr:$dst)]>; } let Predicates = [HasAVX] in @@ -6236,10 +6234,8 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>; -// FIXME: -// There's an AssertZext in the way of writing the store pattern -// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) + [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1), + imm:$src2)))), addr:$dst)]>; } let Predicates = [HasAVX] in diff --git a/test/CodeGen/X86/extract-store.ll b/test/CodeGen/X86/extract-store.ll new file mode 100644 index 00000000000..27d93804ba6 --- /dev/null +++ b/test/CodeGen/X86/extract-store.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41 +; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX + +define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) { +; AVX: vpextrb +; SSE41: pextrb +; AVX-NOT: movb +; SSE41-NOT: movb + %vecext = extractelement <16 x i8> %foo, i32 15 + store i8 %vecext, i8* %dst, align 1 + ret void +} + +define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) { +; AVX: vpextrw +; SSE41: pextrw +; AVX-NOT: movw +; SSE41-NOT: movw + %vecext = extractelement <8 x i16> %foo, i32 15 + store i16 %vecext, i16* %dst, align 1 + ret void +} diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll index 26815a422ec..41bea859f47 100644 --- a/test/CodeGen/X86/widen_load-2.ll +++ b/test/CodeGen/X86/widen_load-2.ll @@ -149,9 +149,9 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp ; CHECK: movdqa ; CHECK: paddb ; CHECK: paddb -; CHECK: movq ; CHECK: pextrb ; CHECK: pextrw +; CHECK: movq ; CHECK: ret %a = load %i8vec31* %ap, align 16 %b = load %i8vec31* %bp, align 16