mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[PowerPC] Legalize saturating vector add/sub
These intrinsics and the corresponding ISD nodes were recently added. PPC has instructions that do this for vectors. Legalize them and add patterns to emit the satuarting instructions. Differential revision: https://reviews.llvm.org/D71940
This commit is contained in:
parent
54ef354524
commit
be1be6dd59
@ -567,6 +567,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
}
|
||||
|
||||
if (Subtarget.hasAltivec()) {
|
||||
for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
|
||||
setOperationAction(ISD::SADDSAT, VT, Legal);
|
||||
setOperationAction(ISD::SSUBSAT, VT, Legal);
|
||||
setOperationAction(ISD::UADDSAT, VT, Legal);
|
||||
setOperationAction(ISD::USUBSAT, VT, Legal);
|
||||
}
|
||||
// First set operation action for all vector types to expand. Then we
|
||||
// will selectively turn on ones that can be effectively codegen'd.
|
||||
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
|
||||
|
@ -869,6 +869,20 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
|
||||
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
|
||||
|
||||
// Saturating adds/subtracts.
|
||||
def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
|
||||
def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
|
||||
def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
|
||||
def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
|
||||
def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
|
||||
def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
|
||||
def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
|
||||
def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
|
||||
def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
|
||||
def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
|
||||
def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
|
||||
def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
|
||||
|
||||
// Loads.
|
||||
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
|
||||
|
||||
|
135
test/CodeGen/PowerPC/saturating-intrinsics.ll
Normal file
135
test/CodeGen/PowerPC/saturating-intrinsics.ll
Normal file
@ -0,0 +1,135 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -O3 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names \
|
||||
; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | FileCheck %s
|
||||
define dso_local <16 x i8 > @vectorsaddb(<16 x i8 > %a, <16 x i8 > %b) {
|
||||
; CHECK-LABEL: vectorsaddb:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vaddsbs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %call
|
||||
}
|
||||
|
||||
define dso_local <16 x i8 > @vectorssubb(<16 x i8 > %a, <16 x i8 > %b) {
|
||||
; CHECK-LABEL: vectorssubb:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsubsbs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %call
|
||||
}
|
||||
|
||||
define dso_local <16 x i8 > @vectoruaddb(<16 x i8 > %a, <16 x i8 > %b) {
|
||||
; CHECK-LABEL: vectoruaddb:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vaddubs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %call
|
||||
}
|
||||
|
||||
define dso_local <16 x i8 > @vectorusubb(<16 x i8 > %a, <16 x i8 > %b) {
|
||||
; CHECK-LABEL: vectorusubb:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsububs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %call
|
||||
}
|
||||
|
||||
define dso_local <8 x i16 > @vectorsaddh(<8 x i16 > %a, <8 x i16 > %b) {
|
||||
; CHECK-LABEL: vectorsaddh:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vaddshs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %call
|
||||
}
|
||||
|
||||
define dso_local <8 x i16 > @vectorssubh(<8 x i16 > %a, <8 x i16 > %b) {
|
||||
; CHECK-LABEL: vectorssubh:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsubshs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %call
|
||||
}
|
||||
|
||||
define dso_local <8 x i16 > @vectoruaddh(<8 x i16 > %a, <8 x i16 > %b) {
|
||||
; CHECK-LABEL: vectoruaddh:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vadduhs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %call
|
||||
}
|
||||
|
||||
define dso_local <8 x i16 > @vectorusubh(<8 x i16 > %a, <8 x i16 > %b) {
|
||||
; CHECK-LABEL: vectorusubh:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsubuhs v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
|
||||
ret <8 x i16> %call
|
||||
}
|
||||
|
||||
define dso_local <4 x i32 > @vectorsaddw(<4 x i32 > %a, <4 x i32 > %b) {
|
||||
; CHECK-LABEL: vectorsaddw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vaddsws v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define dso_local <4 x i32 > @vectorssubw(<4 x i32 > %a, <4 x i32 > %b) {
|
||||
; CHECK-LABEL: vectorssubw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsubsws v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define dso_local <4 x i32 > @vectoruaddw(<4 x i32 > %a, <4 x i32 > %b) {
|
||||
; CHECK-LABEL: vectoruaddw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vadduws v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define dso_local <4 x i32 > @vectorusubw(<4 x i32 > %a, <4 x i32 > %b) {
|
||||
; CHECK-LABEL: vectorusubw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vsubuws v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%call = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
|
Loading…
x
Reference in New Issue
Block a user