1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[X86] Limit the 'x' inline assembly constraint to zmm0-15 when used for a 512 type.

The 'v' constraint is used to select zmm0-31. This makes 512 bit consistent with 128/256-bit.a

llvm-svn: 358450
This commit is contained in:
Craig Topper 2019-04-15 21:06:32 +00:00
parent 74fb88a73a
commit 08eee5612e
4 changed files with 18 additions and 1 deletions

View File

@ -43730,7 +43730,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v16f32:
case MVT::v16i32:
case MVT::v8i64:
return std::make_pair(0U, &X86::VR512RegClass);
if (VConstraint)
return std::make_pair(0U, &X86::VR512RegClass);
return std::make_pair(0U, &X86::VR512_0_15RegClass);
}
break;
}

View File

@ -163,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
case X86::VR512_0_15RegClassID:
case X86::VR512RegClassID:
// Don't return a super-class that would shrink the spill size.
// That can happen with the vector and float classes.

View File

@ -570,6 +570,10 @@ def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
// Represents the lower 16 registers that have VEX/legacy encodable subregs.
def VR512_0_15 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 15)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;

View File

@ -0,0 +1,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -stop-after=expand-isel-pseudos | FileCheck %s
; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1
; CHECK: %[[REG2:.*]]:vr512_0_15 = COPY %2
; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, 7340042, def %{{.*}}, 1179657, %{{.*}}, 7340041, %[[REG1]], 7340041, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
define <8 x i64> @mask_Yk_i8(i8 signext %msk, <8 x i64> %x, <8 x i64> %y) {
entry:
%0 = tail call <8 x i64> asm "vpaddq\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <8 x i64> %x, <8 x i64> %y)
ret <8 x i64> %0
}