diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 6f1949aa149..f39dc7bbf57 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -3081,6 +3081,25 @@ let Predicates = [UseHVX] in { def: Pat<(VecI32 (zext_invec HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>; + // The "source" types are not legal, and there are no parameterized + // definitions for them, but they are length-specific. + let Predicates = [UseHVX,UseHVX64B] in { + def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), + (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), + (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), + (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; + } + let Predicates = [UseHVX,UseHVX128B] in { + def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), + (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), + (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), + (V6_vasrh (V6_vaslh HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; + } + def: Pat<(VecI8 (trunc HWI16:$Vss)), (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; def: Pat<(VecI16 (trunc HWI32:$Vss)), diff --git a/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll b/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll new file mode 100644 index 00000000000..aee2a59af1a --- /dev/null +++ b/test/CodeGen/Hexagon/autohvx/isel-sext-inreg.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that both functions compile successfully. + + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +; CHECK-LABEL: danny: +; CHECK: vmem +define void @danny() #0 { +b0: + %v1 = load i16, i16* undef, align 2 + %v2 = insertelement <8 x i16> undef, i16 %v1, i32 6 + %v3 = insertelement <8 x i16> %v2, i16 undef, i32 7 + %v4 = sext <8 x i16> %v3 to <8 x i32> + %v5 = mul <8 x i32> %v4, + %v6 = add <8 x i32> %v5, + %v7 = add <8 x i32> %v6, zeroinitializer + %v8 = ashr <8 x i32> %v7, + %v9 = add nsw <8 x i32> zeroinitializer, %v8 + %v10 = shl <8 x i32> %v9, + %v11 = ashr exact <8 x i32> %v10, + %v12 = sub nsw <8 x i32> zeroinitializer, %v11 + %v13 = trunc <8 x i32> %v12 to <8 x i16> + %v14 = extractelement <8 x i16> %v13, i32 7 + store i16 %v14, i16* undef, align 2 + unreachable +} + +; CHECK-LABEL: sammy: +; CHECK: vmem +define void @sammy() #1 { +b0: + %v1 = load i16, i16* undef, align 2 + %v2 = insertelement <16 x i16> undef, i16 %v1, i32 14 + %v3 = insertelement <16 x i16> %v2, i16 undef, i32 15 + %v4 = sext <16 x i16> %v3 to <16 x i32> + %v5 = mul <16 x i32> %v4, + %v6 = add <16 x i32> %v5, + %v7 = add <16 x i32> %v6, zeroinitializer + %v8 = ashr <16 x i32> %v7, + %v9 = add nsw <16 x i32> zeroinitializer, %v8 + %v10 = shl <16 x i32> %v9, + %v11 = ashr exact <16 x i32> %v10, + %v12 = sub nsw <16 x i32> zeroinitializer, %v11 + %v13 = trunc <16 x i32> %v12 to <16 x i16> + %v14 = extractelement <16 x i16> %v13, i32 15 + store i16 %v14, i16* undef, align 2 + unreachable +} + +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } +attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }