mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Allow Yz inline assembly constraint to choose ymm0 or zmm0 when avx/avx512 are enabled and type is 256 or 512 bits
gcc supports selecting ymm0/zmm0 for the Yz constraint when used with 256 or 512 bit vector types. Fixes PR45806 Differential Revision: https://reviews.llvm.org/D79448
This commit is contained in:
parent
8c3e5ac746
commit
e48ee635ba
@ -48046,7 +48046,9 @@ TargetLowering::ConstraintWeight
|
||||
// XMM0
|
||||
case 'z':
|
||||
case '0':
|
||||
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
|
||||
if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
|
||||
((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) ||
|
||||
((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()))
|
||||
return CW_SpecificReg;
|
||||
return CW_Invalid;
|
||||
// Conditional OpMask regs (AVX512)
|
||||
@ -48496,6 +48498,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
if (Subtarget.hasAVX())
|
||||
return std::make_pair(0U, &X86::VR256RegClass);
|
||||
break;
|
||||
case MVT::v64i8:
|
||||
case MVT::v32i16:
|
||||
case MVT::v8f64:
|
||||
case MVT::v16f32:
|
||||
case MVT::v16i32:
|
||||
@ -48521,7 +48525,42 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
case 'z':
|
||||
case '0':
|
||||
if (!Subtarget.hasSSE1()) break;
|
||||
return std::make_pair(X86::XMM0, &X86::VR128RegClass);
|
||||
switch (VT.SimpleTy) {
|
||||
default: break;
|
||||
// Scalar SSE types.
|
||||
case MVT::f32:
|
||||
case MVT::i32:
|
||||
return std::make_pair(X86::XMM0, &X86::FR32RegClass);
|
||||
case MVT::f64:
|
||||
case MVT::i64:
|
||||
return std::make_pair(X86::XMM0, &X86::FR64RegClass);
|
||||
case MVT::f128:
|
||||
case MVT::v16i8:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v4f32:
|
||||
case MVT::v2f64:
|
||||
return std::make_pair(X86::XMM0, &X86::VR128RegClass);
|
||||
// AVX types.
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i16:
|
||||
case MVT::v8i32:
|
||||
case MVT::v4i64:
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f64:
|
||||
if (Subtarget.hasAVX())
|
||||
return std::make_pair(X86::YMM0, &X86::VR256RegClass);
|
||||
break;
|
||||
case MVT::v8f64:
|
||||
case MVT::v16f32:
|
||||
case MVT::v16i32:
|
||||
case MVT::v8i64:
|
||||
if (Subtarget.hasAVX512())
|
||||
return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 'k':
|
||||
// This register class doesn't allocate k0 for masked vector operation.
|
||||
if (Subtarget.hasAVX512()) {
|
||||
|
@ -134,3 +134,13 @@ entry:
|
||||
ret <8 x float> %0
|
||||
}
|
||||
|
||||
define <8 x float> @testYMM0() {
|
||||
; CHECK: vpcmpeqd %ymm0, %ymm0, %ymm0
|
||||
entry:
|
||||
%ymm0 = alloca <8 x float>, align 32
|
||||
%0 = call <8 x float> asm "vpcmpeqd $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
|
||||
store <8 x float> %0, <8 x float>* %ymm0, align 32
|
||||
%1 = load <8 x float>, <8 x float>* %ymm0, align 32
|
||||
ret <8 x float> %1
|
||||
}
|
||||
|
||||
|
@ -70,3 +70,12 @@ entry:
|
||||
ret <16 x float> %0
|
||||
}
|
||||
|
||||
define <16 x float> @testZMM0() {
|
||||
entry:
|
||||
; CHECK: vpternlogd $255, %zmm0, %zmm0, %zmm0
|
||||
%zmm0 = alloca <16 x float>, align 64
|
||||
%0 = call <16 x float> asm "vpternlogd $$255, $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"()
|
||||
store <16 x float> %0, <16 x float>* %zmm0, align 64
|
||||
%1 = load <16 x float>, <16 x float>* %zmm0, align 64
|
||||
ret <16 x float> %1
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user