1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[PowerPC] Legalize saturating vector add/sub

These intrinsics and the corresponding ISD nodes were recently added. PPC has
instructions that do this for vectors. Legalize them and add patterns to emit
the satuarting instructions.

Differential revision: https://reviews.llvm.org/D71940
This commit is contained in:
Nemanja Ivanovic 2020-01-15 07:00:22 -06:00
parent 54ef354524
commit be1be6dd59
3 changed files with 155 additions and 0 deletions

View File

@ -567,6 +567,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasAltivec()) {
for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {

View File

@ -869,6 +869,20 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
// Saturating adds/subtracts.
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;

View File

@ -0,0 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O3 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
define dso_local <16 x i8 > @vectorsaddb(<16 x i8 > %a, <16 x i8 > %b) {
; CHECK-LABEL: vectorsaddb:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vaddsbs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %call
}
define dso_local <16 x i8 > @vectorssubb(<16 x i8 > %a, <16 x i8 > %b) {
; CHECK-LABEL: vectorssubb:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubsbs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %call
}
define dso_local <16 x i8 > @vectoruaddb(<16 x i8 > %a, <16 x i8 > %b) {
; CHECK-LABEL: vectoruaddb:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vaddubs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %call
}
define dso_local <16 x i8 > @vectorusubb(<16 x i8 > %a, <16 x i8 > %b) {
; CHECK-LABEL: vectorusubb:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsububs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %call
}
define dso_local <8 x i16 > @vectorsaddh(<8 x i16 > %a, <8 x i16 > %b) {
; CHECK-LABEL: vectorsaddh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vaddshs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %call
}
define dso_local <8 x i16 > @vectorssubh(<8 x i16 > %a, <8 x i16 > %b) {
; CHECK-LABEL: vectorssubh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubshs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %call
}
define dso_local <8 x i16 > @vectoruaddh(<8 x i16 > %a, <8 x i16 > %b) {
; CHECK-LABEL: vectoruaddh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vadduhs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %call
}
define dso_local <8 x i16 > @vectorusubh(<8 x i16 > %a, <8 x i16 > %b) {
; CHECK-LABEL: vectorusubh:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubuhs v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %call
}
define dso_local <4 x i32 > @vectorsaddw(<4 x i32 > %a, <4 x i32 > %b) {
; CHECK-LABEL: vectorsaddw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vaddsws v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %call
}
define dso_local <4 x i32 > @vectorssubw(<4 x i32 > %a, <4 x i32 > %b) {
; CHECK-LABEL: vectorssubw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubsws v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %call
}
define dso_local <4 x i32 > @vectoruaddw(<4 x i32 > %a, <4 x i32 > %b) {
; CHECK-LABEL: vectoruaddw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vadduws v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %call
}
define dso_local <4 x i32 > @vectorusubw(<4 x i32 > %a, <4 x i32 > %b) {
; CHECK-LABEL: vectorusubw:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsubuws v2, v2, v3
; CHECK-NEXT: blr
entry:
%call = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %call
}
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)