1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 03:53:04 +02:00

[Hexagon] Add patterns for sext_inreg of HVX vector types

llvm-svn: 323250
This commit is contained in:
Krzysztof Parzyszek 2018-01-23 19:56:16 +00:00
parent 2b414f4473
commit 43f3c72b7f
2 changed files with 73 additions and 0 deletions

View File

@ -3081,6 +3081,25 @@ let Predicates = [UseHVX] in {
def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
(LoVec (VZxth (LoVec (VZxtb $Vs))))>;
// The "source" types are not legal, and there are no parameterized
// definitions for them, but they are length-specific.
let Predicates = [UseHVX,UseHVX64B] in {
def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
(V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
(V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
(V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
}
let Predicates = [UseHVX,UseHVX128B] in {
def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
(V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
(V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
(V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
}
def: Pat<(VecI8 (trunc HWI16:$Vss)),
(V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
def: Pat<(VecI16 (trunc HWI32:$Vss)),

View File

@ -0,0 +1,54 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that both functions compile successfully.
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
; CHECK-LABEL: danny:
; CHECK: vmem
define void @danny() #0 {
b0:
%v1 = load i16, i16* undef, align 2
%v2 = insertelement <8 x i16> undef, i16 %v1, i32 6
%v3 = insertelement <8 x i16> %v2, i16 undef, i32 7
%v4 = sext <8 x i16> %v3 to <8 x i32>
%v5 = mul <8 x i32> %v4, <i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410>
%v6 = add <8 x i32> %v5, <i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768>
%v7 = add <8 x i32> %v6, zeroinitializer
%v8 = ashr <8 x i32> %v7, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v9 = add nsw <8 x i32> zeroinitializer, %v8
%v10 = shl <8 x i32> %v9, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v11 = ashr exact <8 x i32> %v10, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v12 = sub nsw <8 x i32> zeroinitializer, %v11
%v13 = trunc <8 x i32> %v12 to <8 x i16>
%v14 = extractelement <8 x i16> %v13, i32 7
store i16 %v14, i16* undef, align 2
unreachable
}
; CHECK-LABEL: sammy:
; CHECK: vmem
define void @sammy() #1 {
b0:
%v1 = load i16, i16* undef, align 2
%v2 = insertelement <16 x i16> undef, i16 %v1, i32 14
%v3 = insertelement <16 x i16> %v2, i16 undef, i32 15
%v4 = sext <16 x i16> %v3 to <16 x i32>
%v5 = mul <16 x i32> %v4, <i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410, i32 -36410>
%v6 = add <16 x i32> %v5, <i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768, i32 32768>
%v7 = add <16 x i32> %v6, zeroinitializer
%v8 = ashr <16 x i32> %v7, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v9 = add nsw <16 x i32> zeroinitializer, %v8
%v10 = shl <16 x i32> %v9, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v11 = ashr exact <16 x i32> %v10, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%v12 = sub nsw <16 x i32> zeroinitializer, %v11
%v13 = trunc <16 x i32> %v12 to <16 x i16>
%v14 = extractelement <16 x i16> %v13, i32 15
store i16 %v14, i16* undef, align 2
unreachable
}
attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }