1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/Thumb2/mve-extractelt.ll
Simon Tatham 6aa69a8001 [ARM,MVE] Use VMOV.{S8,S16} for sign-extended extractelement.
MVE includes instructions that extract an 8- or 16-bit lane from a
vector and sign-extend it into the output 32-bit GPR. `ARMInstrMVE.td`
already included isel patterns to select those instructions in
response to the `ARMISD::VGETLANEs` selection-DAG node type. But
`ARMISD::VGETLANEs` was never actually generated, because the code
that creates it was conditioned on NEON only.

It's an easy fix to enable the same code for integer MVE, and now IR
that sign-extends the result of an extractelement (whether explicitly
or as part of the function call ABI) will use `vmov.s8` instead of
`vmov.u8` followed by `sxtb`.

Reviewers: SjoerdMeijer, dmgreen, ostannard

Subscribers: kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70132
2019-11-13 09:08:41 +00:00

87 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
define arm_aapcs_vfpcc i32 @u8_explicit_extend(<16 x i8> %a) {
; CHECK-LABEL: u8_explicit_extend:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u8 r0, q0[10]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <16 x i8> %a, i32 10
%1 = zext i8 %0 to i32
ret i32 %1
}
define arm_aapcs_vfpcc i32 @s8_explicit_extend(<16 x i8> %a) {
; CHECK-LABEL: s8_explicit_extend:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.s8 r0, q0[10]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <16 x i8> %a, i32 10
%1 = sext i8 %0 to i32
ret i32 %1
}
define arm_aapcs_vfpcc i8 @u8_extend_via_pcs(<16 x i8> %a) {
; CHECK-LABEL: u8_extend_via_pcs:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u8 r0, q0[10]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <16 x i8> %a, i32 10
ret i8 %0
}
define arm_aapcs_vfpcc signext i8 @s8_extend_via_pcs(<16 x i8> %a) {
; CHECK-LABEL: s8_extend_via_pcs:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.s8 r0, q0[10]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <16 x i8> %a, i32 10
ret i8 %0
}
define arm_aapcs_vfpcc i32 @u16_explicit_extend(<8 x i16> %a) {
; CHECK-LABEL: u16_explicit_extend:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <8 x i16> %a, i32 5
%1 = zext i16 %0 to i32
ret i32 %1
}
define arm_aapcs_vfpcc i32 @s16_explicit_extend(<8 x i16> %a) {
; CHECK-LABEL: s16_explicit_extend:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.s16 r0, q0[5]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <8 x i16> %a, i32 5
%1 = sext i16 %0 to i32
ret i32 %1
}
define arm_aapcs_vfpcc i16 @u16_extend_via_pcs(<8 x i16> %a) {
; CHECK-LABEL: u16_extend_via_pcs:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <8 x i16> %a, i32 5
ret i16 %0
}
define arm_aapcs_vfpcc signext i16 @s16_extend_via_pcs(<8 x i16> %a) {
; CHECK-LABEL: s16_extend_via_pcs:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.s16 r0, q0[5]
; CHECK-NEXT: bx lr
entry:
%0 = extractelement <8 x i16> %a, i32 5
ret i16 %0
}