From 24c6842d698158cb373b2f54e259b2d550f0532a Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 27 Aug 2013 10:31:36 +0000 Subject: [PATCH] ARM: add natural patterns for vaddhl and vsubhl. These instructions aren't particularly complicated and it's well worth having patterns for some reasonably useful LLVM IR that will match them. Soon we should be able to switch Clang over to producing this natural version. llvm-svn: 189335 --- lib/Target/ARM/ARMInstrNEON.td | 14 ++++++++++++++ test/CodeGen/ARM/vadd.ll | 27 +++++++++++++++++++++++++++ test/CodeGen/ARM/vsub.ll | 27 +++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 59dea48b6ab..e86e658d9d8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3979,6 +3979,13 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -4262,6 +4269,13 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index a1ad37b5f8b..c2c16aa132f 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -152,6 +152,33 @@ declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind rea declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +define <8 x i8> @vaddhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vaddhni16_natural: +; CHECK: vaddhn.i16 + %sum = add <8 x i16> %A, %B + %shift = lshr <8 x i16> %sum, + %trunc = trunc <8 x i16> %shift to <8 x i8> + ret <8 x i8> %trunc +} + +define <4 x i16> @vaddhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind { +; CHECK-LABEL: vaddhni32_natural: +; CHECK: vaddhn.i32 + %sum = add <4 x i32> %A, %B + %shift = lshr <4 x i32> %sum, + %trunc = trunc <4 x i32> %shift to <4 x i16> + ret <4 x i16> %trunc +} + +define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK-LABEL: vaddhni64_natural: +; CHECK: vaddhn.i64 + %sum = add <2 x i64> %A, %B + %shift = lshr <2 x i64> %sum, + %trunc = trunc <2 x i64> %shift to <2 x i32> + ret <2 x i32> %trunc +} + define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK-LABEL: vaddls8: ;CHECK: vaddl.s8 diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 89c3095ab2b..8ed8d42459b 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -121,6 +121,33 @@ declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind read declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vsubhni16_natural: +; CHECK: vsubhn.i16 + %sum = sub <8 x i16> %A, %B + %shift = lshr <8 x i16> %sum, + %trunc = trunc <8 x i16> %shift to <8 x i8> + ret <8 x i8> %trunc +} + +define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind { +; CHECK-LABEL: vsubhni32_natural: +; CHECK: vsubhn.i32 + %sum = sub <4 x i32> %A, %B + %shift = lshr <4 x i32> %sum, + %trunc = trunc <4 x i32> %shift to <4 x i16> + ret <4 x i16> %trunc +} + +define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind { +; CHECK-LABEL: vsubhni64_natural: +; CHECK: vsubhn.i64 + %sum = sub <2 x i64> %A, %B + %shift = lshr <2 x i64> %sum, + %trunc = trunc <2 x i64> %shift to <2 x i32> + ret <2 x i32> %trunc +} + define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK-LABEL: vrsubhni16: ;CHECK: vrsubhn.i16