1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00

[Hexagon] Replace incorrect pattern for vpackl HWI32 -> HVi8

V6_vdealb4w is not correct for pairs, use V6_vpackeh/V6_vpackeb instead.
This commit is contained in:
Krzysztof Parzyszek 2020-09-15 20:32:09 -05:00
parent 5540a2de5d
commit 718a375cea
3 changed files with 8 additions and 5 deletions

View File

@ -417,7 +417,8 @@ let Predicates = [UseHVX] in {
def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>;
def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>;
def: Pat<(VecI8 (vpackl HWI16:$Vs)), (V6_vpackeb (HiVec $Vs), (LoVec $Vs))>;
def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>;
def: Pat<(VecI8 (vpackl HWI32:$Vs)),
(V6_vpackeb (IMPLICIT_DEF), (V6_vpackeh (HiVec $Vs), (LoVec $Vs)))>;
def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>;

View File

@ -2,10 +2,11 @@
; This has a v32i8 = truncate v16i32 (64b mode), which was legalized to
; 64i8 = vpackl v32i32, for which there were no selection patterns provided.
; Check that we generate vdeale for this.
; Check that we generate vpackeh->vpackeb for this.
; CHECK-LABEL: fred:
; CHECK: vdeale(v1.b,v0.b)
; CHECK: v[[V0:[0-9]+]].h = vpacke(v1.w,v0.w)
; CHECK: = vpacke({{.*}},v[[V0]].h)
define void @fred(<32 x i8>* %a0, <32 x i32> %a1) #0 {
%v0 = trunc <32 x i32> %a1 to <32 x i8>
store <32 x i8> %v0, <32 x i8>* %a0, align 32

View File

@ -49,8 +49,9 @@ define void @f2(<64 x i16>* %a0, <64 x i8>* %a1) #0 {
; CHECK-DAG: v[[V0:[0-9]+]] = vmem(r0+#0)
; CHECK-DAG: v[[V1:[0-9]+]] = vmem(r0+#1)
; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
; CHECK: v[[V2:[0-9]+]].b = vdeale(v[[V1]].b,v[[V0]].b)
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
; CHECK: v[[V2:[0-9]+]].h = vpacke(v[[V1]].w,v[[V0]].w)
; CHECK: v[[V3:[0-9]+]].b = vpacke({{.*}},v[[V2]].h)
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V3]]
define void @f3(<64 x i32>* %a0, <64 x i8>* %a1) #0 {
%v0 = load <64 x i32>, <64 x i32>* %a0, align 128
%v1 = trunc <64 x i32> %v0 to <64 x i8>