1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[X86][AVX512] Move v2i64/v4i64 VPABS lowering to tablegen

Extend NoVLX targets to use the 512-bit versions

llvm-svn: 302359
This commit is contained in:
Simon Pilgrim 2017-05-06 19:11:59 +00:00
parent a125ccfde1
commit f17aa562a5
3 changed files with 44 additions and 14 deletions

View File

@ -1240,8 +1240,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@ -1308,6 +1306,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
// NonVLX sub-targets extend 128/256 vectors to use the 512 version.
setOperationAction(ISD::ABS, MVT::v4i64, Legal);
setOperationAction(ISD::ABS, MVT::v2i64, Legal);
for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);

View File

@ -8631,6 +8631,20 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v4i64 (abs VR256X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
sub_ymm)>;
def : Pat<(v2i64 (abs VR128X:$src)),
(EXTRACT_SUBREG
(VPABSQZrr
(INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
sub_xmm)>;
}
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;

View File

@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
; fold (abs c1) -> c2
define <4 x i32> @combine_v4i32_abs_constant() {
@ -46,17 +48,29 @@ define <32 x i8> @combine_v32i8_abs_abs(<32 x i8> %a) {
}
define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
; CHECK-LABEL: combine_v4i64_abs_abs:
; CHECK: # BB#0:
; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpsrad $31, %ymm0, %ymm1
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
; AVX2-LABEL: combine_v4i64_abs_abs:
; AVX2: # BB#0:
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: combine_v4i64_abs_abs:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512F-NEXT: vpabsq %zmm0, %zmm0
; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: combine_v4i64_abs_abs:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vpabsq %ymm0, %ymm0
; AVX512VL-NEXT: retq
%n1 = sub <4 x i64> zeroinitializer, %a
%b1 = icmp slt <4 x i64> %a, zeroinitializer
%a1 = select <4 x i1> %b1, <4 x i64> %n1, <4 x i64> %a