1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

AVX-512: Implemented CMOV for 512-bit vectors

llvm-svn: 193747
This commit is contained in:
Elena Demikhovsky 2013-10-31 13:15:32 +00:00
parent ae9a008dd7
commit 1c867680b8
3 changed files with 46 additions and 2 deletions

View File

@ -15750,6 +15750,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
@ -16633,8 +16636,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
if (Subtarget->hasAVX512() && VT.isVector() &&
Cond.getValueType().getVectorElementType() == MVT::i1) {
EVT CondVT = Cond.getValueType();
if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
CondVT.getVectorElementType() == MVT::i1) {
// v16i8 (select v16i1, v16i8, v16i8) does not have a proper
// lowering on AVX-512. In this case we convert it to
// v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.

View File

@ -884,6 +884,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
[(set VR256:$dst,
(v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V8I64 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V8I64 PSEUDO!",
[(set VR512:$dst,
(v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V8F64 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V8F64 PSEUDO!",
[(set VR512:$dst,
(v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
def CMOV_V16F32 : I<0, Pseudo,
(outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
"#CMOV_V16F32 PSEUDO!",
[(set VR512:$dst,
(v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
EFLAGS)))]>;
}

View File

@ -0,0 +1,22 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
; CHECK-LABEL: select00
; CHECK: vmovaps
; CHECK-NEXT: LBB
define <16 x i32> @select00(i32 %a, <16 x i32> %b) nounwind {
%cmpres = icmp eq i32 %a, 255
%selres = select i1 %cmpres, <16 x i32> zeroinitializer, <16 x i32> %b
%res = xor <16 x i32> %b, %selres
ret <16 x i32> %res
}
; CHECK-LABEL: select01
; CHECK: vmovaps
; CHECK-NEXT: LBB
define <8 x i64> @select01(i32 %a, <8 x i64> %b) nounwind {
%cmpres = icmp eq i32 %a, 255
%selres = select i1 %cmpres, <8 x i64> zeroinitializer, <8 x i64> %b
%res = xor <8 x i64> %b, %selres
ret <8 x i64> %res
}