From 1eb8234c85e2d132bd95e7f4834fe79476fdc059 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 29 Aug 2020 17:07:48 -0500 Subject: [PATCH] [Hexagon] Fix perfect shuffle generation for single vectors Perfect shuffle instruction (vdealvdd/vshuffvdd) work on vector pairs. When given a single input vector, half of it first needs to be transposed into the other vector before the generated shuffles can take effect. Also the first transpose needs to be undone at the end (this last step was missing). --- lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 22 +++++++++++++++++-- .../Hexagon/autohvx/isel-shuff-single.ll | 22 +++++++++++++++++++ .../Hexagon/isel-hvx-pred-bitcast-order.ll | 2 +- 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/Hexagon/autohvx/isel-shuff-single.ll diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index c0f92042e5d..f422057c1c9 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -1895,16 +1895,34 @@ OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) { } } + // From the cycles, construct the sequence of values that will + // then form the control values for vdealvdd/vshuffvdd, i.e. + // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5 + // This essentially strips the M value from the cycles where + // it's present, and performs the insertion of M (then stripping) + // for cycles without M (as described in an earlier comment). SmallVector SwapElems; - if (HwLen == unsigned(VecLen)) + // When the input is extended (i.e. single vector becomes a pair), + // this is done by using an "undef" vector as the second input. + // However, then we get + // input 1: GOODBITS + // input 2: ........ + // but we need + // input 1: ....BITS + // input 2: ....GOOD + // Then at the end, this needs to be undone. To accomplish this, + // artificially add "LogLen-1" at both ends of the sequence. + if (Extend) SwapElems.push_back(LogLen-1); - for (const CycleType &C : Cycles) { + // Do the transformation: (a1..an) -> (M a1..an)(M a1). unsigned First = (C[0] == LogLen-1) ? 1 : 0; SwapElems.append(C.begin()+First, C.end()); if (First == 0) SwapElems.push_back(C[0]); } + if (Extend) + SwapElems.push_back(LogLen-1); const SDLoc &dl(Results.InpNode); OpRef Arg = !Extend ? Va diff --git a/test/CodeGen/Hexagon/autohvx/isel-shuff-single.ll b/test/CodeGen/Hexagon/autohvx/isel-shuff-single.ll new file mode 100644 index 00000000000..9ef4543790b --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/isel-shuff-single.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Perfect shuffle with single input vector. Half of it first needs to be +; transposed into the other vector before the generated shuffles can take +; effect, lastly the first transpose needs to be undone (this last step +; was missing). + +; CHECK-LABEL: f0: +; CHECK-DAG: r[[R0:[0-9]+]] = #66 +; CHECK-DAG: r[[R1:[0-9]+]] = #40 +; CHECK-DAG: r[[R2:[0-9]+]] = #85 +; CHECK: v1:0 = vdeal(v{{[0-9]+}},v0,r[[R0]]) +; CHECK: v1:0 = vshuff(v1,v0,r[[R1]]) +; CHECK: v1:0 = vshuff(v1,v0,r[[R2]]) +; CHECK-NOT: = v + +define <128 x i8> @f0(<128 x i8> %a0) #0 { + %v0 = shufflevector <128 x i8> %a0, <128 x i8> undef, <128 x i32> + ret <128 x i8> %v0 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b,-packets" } diff --git a/test/CodeGen/Hexagon/isel-hvx-pred-bitcast-order.ll b/test/CodeGen/Hexagon/isel-hvx-pred-bitcast-order.ll index fd3f84916c5..1fdf8a5fb0e 100644 --- a/test/CodeGen/Hexagon/isel-hvx-pred-bitcast-order.ll +++ b/test/CodeGen/Hexagon/isel-hvx-pred-bitcast-order.ll @@ -4,7 +4,7 @@ ; CHECK: vdeal ; CHECK: vdeal -; CHECK: v[[V1:[0-9]+]]:[[V0:[0-9]+]] = vdeal +; CHECK: v[[V1:[0-9]+]]:[[V0:[0-9]+]] = vshuff ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: vmem(r[[RA:[0-9]+]]+#0) = v[[V0]]