mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86][F16C] Remove cvtph2ps intrinsics and use generic half2float conversion (PR37554)
This removes everything but int_x86_avx512_mask_vcvtph2ps_512 which provides the SAE variant, but even this can use the fpext generic if the rounding control is the default. Differential Revision: https://reviews.llvm.org/D75162
This commit is contained in:
parent
e70edfb8de
commit
6019f1db6d
@ -2546,26 +2546,16 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
// Half float conversion
|
||||
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<1>]>;
|
||||
def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<1>]>;
|
||||
def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
|
||||
def int_x86_avx512_mask_vcvtph2ps_512 :
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
|
||||
llvm_i16_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<3>]>;
|
||||
def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty,
|
||||
llvm_i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
|
||||
llvm_v16i16_ty, llvm_i16_ty],
|
||||
|
@ -204,6 +204,8 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
|
||||
Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
|
||||
Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
|
||||
Name == "avx512.mask.vcvtph2ps.128" || // Added in 11.0
|
||||
Name == "avx512.mask.vcvtph2ps.256" || // Added in 11.0
|
||||
Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
|
||||
Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
|
||||
Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
|
||||
@ -316,6 +318,7 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
|
||||
Name == "avx.cvtdq2.pd.256" || // Added in 3.9
|
||||
Name == "avx.cvtdq2.ps.256" || // Added in 7.0
|
||||
Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
|
||||
Name.startswith("vcvtph2ps.") || // Added in 11.0
|
||||
Name.startswith("avx.vinsertf128.") || // Added in 3.7
|
||||
Name == "avx2.vinserti128" || // Added in 3.7
|
||||
Name.startswith("avx512.mask.insert") || // Added in 4.0
|
||||
@ -2132,6 +2135,23 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
|
||||
}
|
||||
|
||||
if (CI->getNumArgOperands() >= 3)
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
|
||||
CI->getArgOperand(1));
|
||||
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
|
||||
Name.startswith("vcvtph2ps."))) {
|
||||
Type *DstTy = CI->getType();
|
||||
Rep = CI->getArgOperand(0);
|
||||
Type *SrcTy = Rep->getType();
|
||||
unsigned NumDstElts = DstTy->getVectorNumElements();
|
||||
if (NumDstElts != SrcTy->getVectorNumElements()) {
|
||||
assert(NumDstElts == 4 && "Unexpected vector size");
|
||||
uint32_t ShuffleMask[4] = {0, 1, 2, 3};
|
||||
Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
|
||||
}
|
||||
Rep = Builder.CreateBitCast(
|
||||
Rep, VectorType::get(Type::getHalfTy(C), NumDstElts));
|
||||
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
|
||||
if (CI->getNumArgOperands() >= 3)
|
||||
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
|
||||
CI->getArgOperand(1));
|
||||
|
@ -783,10 +783,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86ISD::FSUBS, X86ISD::FSUBS_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK,
|
||||
X86ISD::FSUBS, X86ISD::FSUBS_RND),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTPH2PS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK,
|
||||
X86ISD::CVTPH2PS, 0),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_SAE,
|
||||
X86ISD::CVTPH2PS, X86ISD::CVTPH2PS_SAE),
|
||||
X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, CVTPS2PH_MASK,
|
||||
@ -1108,8 +1104,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(subborrow_64, ADX, X86ISD::SBB, X86ISD::SUB),
|
||||
X86_INTRINSIC_DATA(tbm_bextri_u32, BEXTRI, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(tbm_bextri_u64, BEXTRI, X86ISD::BEXTR, 0),
|
||||
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
|
||||
X86_INTRINSIC_DATA(vcvtph2ps_256, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
|
||||
X86_INTRINSIC_DATA(vcvtps2ph_128, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
|
||||
X86_INTRINSIC_DATA(vcvtps2ph_256, INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),
|
||||
|
||||
|
@ -2543,50 +2543,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
}
|
||||
break;
|
||||
|
||||
case Intrinsic::x86_vcvtph2ps_128:
|
||||
case Intrinsic::x86_vcvtph2ps_256: {
|
||||
auto Arg = II->getArgOperand(0);
|
||||
auto ArgType = cast<VectorType>(Arg->getType());
|
||||
auto RetType = cast<VectorType>(II->getType());
|
||||
unsigned ArgWidth = ArgType->getNumElements();
|
||||
unsigned RetWidth = RetType->getNumElements();
|
||||
assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
|
||||
assert(ArgType->isIntOrIntVectorTy() &&
|
||||
ArgType->getScalarSizeInBits() == 16 &&
|
||||
"CVTPH2PS input type should be 16-bit integer vector");
|
||||
assert(RetType->getScalarType()->isFloatTy() &&
|
||||
"CVTPH2PS output type should be 32-bit float vector");
|
||||
|
||||
// Constant folding: Convert to generic half to single conversion.
|
||||
if (isa<ConstantAggregateZero>(Arg))
|
||||
return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
|
||||
|
||||
if (isa<ConstantDataVector>(Arg)) {
|
||||
auto VectorHalfAsShorts = Arg;
|
||||
if (RetWidth < ArgWidth) {
|
||||
SmallVector<uint32_t, 8> SubVecMask;
|
||||
for (unsigned i = 0; i != RetWidth; ++i)
|
||||
SubVecMask.push_back((int)i);
|
||||
VectorHalfAsShorts = Builder.CreateShuffleVector(
|
||||
Arg, UndefValue::get(ArgType), SubVecMask);
|
||||
}
|
||||
|
||||
auto VectorHalfType =
|
||||
VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
|
||||
auto VectorHalfs =
|
||||
Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
|
||||
auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
|
||||
return replaceInstUsesWith(*II, VectorFloats);
|
||||
}
|
||||
|
||||
// We only use the lowest lanes of the argument.
|
||||
if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
|
||||
II->setArgOperand(0, V);
|
||||
return II;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse_cvtss2si:
|
||||
case Intrinsic::x86_sse_cvtss2si64:
|
||||
case Intrinsic::x86_sse_cvttss2si:
|
||||
|
@ -1907,6 +1907,62 @@ entry:
|
||||
ret <4 x float> %vecins.i
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_cvtph_ps(<4 x i64> %__A) {
|
||||
; CHECK-LABEL: test_mm512_cvtph_ps:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <16 x i16>
|
||||
%1 = bitcast <16 x i16> %0 to <16 x half>
|
||||
%2 = fpext <16 x half> %1 to <16 x float>
|
||||
ret <16 x float> %2
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_mask_cvtph_ps(<16 x float> %__W, i16 zeroext %__U, <4 x i64> %__A) {
|
||||
; X86-LABEL: test_mm512_mask_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %ymm1, %zmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_mask_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %ymm1, %zmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <16 x i16>
|
||||
%1 = bitcast <16 x i16> %0 to <16 x half>
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
%3 = fpext <16 x half> %1 to <16 x float>
|
||||
%4 = select <16 x i1> %2, <16 x float> %3, <16 x float> %__W
|
||||
ret <16 x float> %4
|
||||
}
|
||||
|
||||
define <16 x float> @test_mm512_maskz_cvtph_ps(i16 zeroext %__U, <4 x i64> %__A) {
|
||||
; X86-LABEL: test_mm512_maskz_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm512_maskz_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <4 x i64> %__A to <16 x i16>
|
||||
%1 = bitcast <16 x i16> %0 to <16 x half>
|
||||
%2 = bitcast i16 %__U to <16 x i1>
|
||||
%3 = fpext <16 x half> %1 to <16 x float>
|
||||
%4 = select <16 x i1> %2, <16 x float> %3, <16 x float> zeroinitializer
|
||||
ret <16 x float> %4
|
||||
}
|
||||
|
||||
define <8 x double> @test_mm512_cvtps_pd(<8 x float> %__A) {
|
||||
; CHECK-LABEL: test_mm512_cvtps_pd:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
|
@ -4512,6 +4512,76 @@ define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x
|
||||
ret <8 x double> %res2
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_512:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
|
||||
; CHECK: ## %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x18,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_rrk:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
|
||||
; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_rrk:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
|
||||
; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_rrkz:
|
||||
; X86: ## %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_rrkz:
|
||||
; X64: ## %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
|
||||
|
||||
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
|
||||
; CHECK-LABEL: test_valign_q:
|
||||
; CHECK: ## %bb.0:
|
||||
@ -4633,14 +4703,14 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_0, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI216_0, kind: FK_Data_4
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_1, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI216_1, kind: FK_Data_4
|
||||
; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
|
||||
; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI211_2, kind: FK_Data_4
|
||||
; X86-NEXT: ## fixup A - offset: 6, value: LCPI216_2, kind: FK_Data_4
|
||||
; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
|
||||
; X86-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
@ -4649,14 +4719,14 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16
|
||||
; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_0-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI216_0-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_1-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI216_1-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
|
||||
; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
|
||||
; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI211_2-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: ## fixup A - offset: 6, value: LCPI216_2-4, kind: reloc_riprel_4byte
|
||||
; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
|
||||
; X64-NEXT: retq ## encoding: [0xc3]
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
|
||||
|
@ -1007,76 +1007,6 @@ define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) {
|
||||
}
|
||||
declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_512:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_rrk:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
|
||||
; X64-NEXT: vmovaps %zmm1, %zmm0
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_rrk:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1}
|
||||
; X86-NEXT: vmovaps %zmm1, %zmm0
|
||||
; X86-NEXT: retl
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
|
||||
; X64-LABEL: test_x86_vcvtph2ps_512_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X86-LABEL: test_x86_vcvtph2ps_512_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
|
||||
; X86-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
%res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
|
||||
|
||||
define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, <16 x i16> * %dst) {
|
||||
; X64-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X64: # %bb.0:
|
||||
|
@ -365,6 +365,98 @@ entry:
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_mask_cvtph_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm_mask_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %xmm1, %xmm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_mask_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %xmm1, %xmm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <8 x i16>
|
||||
%1 = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = bitcast <4 x i16> %1 to <4 x half>
|
||||
%3 = bitcast i8 %__U to <8 x i1>
|
||||
%4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = fpext <4 x half> %2 to <4 x float>
|
||||
%6 = select <4 x i1> %4, <4 x float> %5, <4 x float> %__W
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm_maskz_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_maskz_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <8 x i16>
|
||||
%1 = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = bitcast <4 x i16> %1 to <4 x half>
|
||||
%3 = bitcast i8 %__U to <8 x i1>
|
||||
%4 = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%5 = fpext <4 x half> %2 to <4 x float>
|
||||
%6 = select <4 x i1> %4, <4 x float> %5, <4 x float> zeroinitializer
|
||||
ret <4 x float> %6
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm256_mask_cvtph_ps(<8 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm256_mask_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %xmm1, %ymm0 {%k1}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_mask_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %xmm1, %ymm0 {%k1}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <8 x i16>
|
||||
%1 = bitcast <8 x i16> %0 to <8 x half>
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%3 = fpext <8 x half> %1 to <8 x float>
|
||||
%4 = select <8 x i1> %2, <8 x float> %3, <8 x float> %__W
|
||||
ret <8 x float> %4
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm256_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) {
|
||||
; X86-LABEL: test_mm256_maskz_cvtph_ps:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X86-NEXT: kmovw %eax, %k1
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_maskz_cvtph_ps:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: kmovw %edi, %k1
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z}
|
||||
; X64-NEXT: retq
|
||||
entry:
|
||||
%0 = bitcast <2 x i64> %__A to <8 x i16>
|
||||
%1 = bitcast <8 x i16> %0 to <8 x half>
|
||||
%2 = bitcast i8 %__U to <8 x i1>
|
||||
%3 = fpext <8 x half> %1 to <8 x float>
|
||||
%4 = select <8 x i1> %2, <8 x float> %3, <8 x float> zeroinitializer
|
||||
ret <8 x float> %4
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_mask_cvtps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) {
|
||||
; X86-LABEL: test_mm_mask_cvtps_epi32:
|
||||
; X86: # %bb.0: # %entry
|
||||
|
@ -10247,6 +10247,100 @@ define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x fl
|
||||
ret <8 x float> %res2
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_rrk:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
|
||||
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_rrk:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
|
||||
; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_256_rrk:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
|
||||
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_rrk:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
|
||||
; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_256_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
|
||||
|
||||
declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
|
||||
|
||||
define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
|
||||
|
@ -4214,101 +4214,6 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6
|
||||
ret <4 x i64> %res2
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_128:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_rrk:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
|
||||
; X86-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_rrk:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
|
||||
; X64-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
|
||||
; CHECK-LABEL: test_x86_vcvtph2ps_256:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_256_rrk:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
|
||||
; X86-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_rrk:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
|
||||
; X64-NEXT: vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_256_rrkz:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
|
||||
; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_rrkz:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
|
||||
|
||||
define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %src) {
|
||||
; X86-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X86: # %bb.0:
|
||||
|
@ -1,20 +1,20 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefixes=CHECK,X86
|
||||
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefixes=CHECK,X64
|
||||
|
||||
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/f16c-builtins.c
|
||||
|
||||
define float @test_cvtsh_ss(i16 %a0) nounwind {
|
||||
; X32-LABEL: test_cvtsh_ss:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: pushl %eax
|
||||
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovd %eax, %xmm0
|
||||
; X32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X32-NEXT: vmovss %xmm0, (%esp)
|
||||
; X32-NEXT: flds (%esp)
|
||||
; X32-NEXT: popl %eax
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_cvtsh_ss:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: pushl %eax
|
||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: vmovd %eax, %xmm0
|
||||
; X86-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X86-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-NEXT: flds (%esp)
|
||||
; X86-NEXT: popl %eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_cvtsh_ss:
|
||||
; X64: # %bb.0:
|
||||
@ -30,21 +30,23 @@ define float @test_cvtsh_ss(i16 %a0) nounwind {
|
||||
%ins5 = insertelement <8 x i16> %ins4, i16 0, i32 5
|
||||
%ins6 = insertelement <8 x i16> %ins5, i16 0, i32 6
|
||||
%ins7 = insertelement <8 x i16> %ins6, i16 0, i32 7
|
||||
%cvt = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %ins7)
|
||||
%shuffle = shufflevector <8 x i16> %ins7, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%bc = bitcast <4 x i16> %shuffle to <4 x half>
|
||||
%cvt = fpext <4 x half> %bc to <4 x float>
|
||||
%res = extractelement <4 x float> %cvt, i32 0
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define i16 @test_cvtss_sh(float %a0) nounwind {
|
||||
; X32-LABEL: test_cvtss_sh:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; X32-NEXT: vmovd %xmm0, %eax
|
||||
; X32-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X32-NEXT: retl
|
||||
; X86-LABEL: test_cvtss_sh:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X86-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; X86-NEXT: vmovd %xmm0, %eax
|
||||
; X86-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_cvtss_sh:
|
||||
; X64: # %bb.0:
|
||||
@ -64,62 +66,44 @@ define i16 @test_cvtss_sh(float %a0) nounwind {
|
||||
}
|
||||
|
||||
define <4 x float> @test_mm_cvtph_ps(<2 x i64> %a0) nounwind {
|
||||
; X32-LABEL: test_mm_cvtph_ps:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_cvtph_ps:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: test_mm_cvtph_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %arg0)
|
||||
%shuffle = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%bc = bitcast <4 x i16> %shuffle to <4 x half>
|
||||
%res = fpext <4 x half> %bc to <4 x float>
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_mm256_cvtph_ps(<2 x i64> %a0) nounwind {
|
||||
; X32-LABEL: test_mm256_cvtph_ps:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_cvtph_ps:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: test_mm256_cvtph_ps:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
|
||||
%res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %arg0)
|
||||
%bc = bitcast <8 x i16> %arg0 to <8 x half>
|
||||
%res = fpext <8 x half> %bc to <8 x float>
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm_cvtps_ph(<4 x float> %a0) nounwind {
|
||||
; X32-LABEL: test_mm_cvtps_ph:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm_cvtps_ph:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: test_mm_cvtps_ph:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2ph $0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%cvt = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
|
||||
%res = bitcast <8 x i16> %cvt to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @test_mm256_cvtps_ph(<8 x float> %a0) nounwind {
|
||||
; X32-LABEL: test_mm256_cvtps_ph:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtps2ph $0, %ymm0, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: test_mm256_cvtps_ph:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtps2ph $0, %ymm0, %xmm0
|
||||
; X64-NEXT: vzeroupper
|
||||
; X64-NEXT: retq
|
||||
; CHECK-LABEL: test_mm256_cvtps_ph:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vcvtps2ph $0, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%cvt = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
|
||||
%res = bitcast <8 x i16> %cvt to <2 x i64>
|
||||
ret <2 x i64> %res
|
||||
|
148
test/CodeGen/X86/f16c-intrinsics-upgrade.ll
Normal file
148
test/CodeGen/X86/f16c-intrinsics-upgrade.ll
Normal file
@ -0,0 +1,148 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX,X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX512VL,X86-AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX512VL,X64-AVX512VL
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
|
||||
; AVX-LABEL: test_x86_vcvtph2ps_128:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_vcvtph2ps_128:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_m(<8 x i16>* nocapture %a) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X86-AVX512VL: # %bb.0:
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load <8 x i16>, <8 x i16>* %a
|
||||
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %load) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
|
||||
; AVX-LABEL: test_x86_vcvtph2ps_256:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; AVX512VL-LABEL: test_x86_vcvtph2ps_256:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_m(<8 x i16>* nocapture %a) nounwind {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X86-AVX512VL: # %bb.0:
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x00]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load <8 x i16>, <8 x i16>* %a
|
||||
%res = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %load)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_scalar(i64* %ptr) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_scalar:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_scalar:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtph2ps_128_scalar:
|
||||
; X86-AVX512VL: # %bb.0:
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128_scalar:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load i64, i64* %ptr
|
||||
%ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
|
||||
%ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1
|
||||
%bc = bitcast <2 x i64> %ins2 to <8 x i16>
|
||||
%res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc) #2
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_scalar2(i64* %ptr) {
|
||||
; X86-LABEL: test_x86_vcvtph2ps_128_scalar2:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_scalar2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtph2ps_128_scalar2:
|
||||
; X86-AVX512VL: # %bb.0:
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128_scalar2:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load i64, i64* %ptr
|
||||
%ins = insertelement <2 x i64> undef, i64 %load, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
%res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc)
|
||||
ret <4 x float> %res
|
||||
}
|
@ -1,233 +1,49 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefix=X32
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefix=X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefix=X32-AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefix=X64-AVX512VL
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
|
||||
; X32-LABEL: test_x86_vcvtph2ps_128:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
|
||||
|
||||
define <4 x float> @test_x86_vcvtph2ps_128_m(<8 x i16>* nocapture %a) {
|
||||
; X32-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_128_m:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load <8 x i16>, <8 x i16>* %a
|
||||
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %load) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
|
||||
; X32-LABEL: test_x86_vcvtph2ps_256:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps %xmm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
|
||||
|
||||
define <8 x float> @test_x86_vcvtph2ps_256_m(<8 x i16>* nocapture %a) nounwind {
|
||||
; X32-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtph2ps (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x00]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtph2ps_256_m:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load <8 x i16>, <8 x i16>* %a
|
||||
%res = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %load)
|
||||
ret <8 x float> %res
|
||||
}
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX,X86
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX,X64
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX512VL,X86-AVX512VL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -show-mc-encoding -disable-peephole | FileCheck %s --check-prefixes=AVX512VL,X64-AVX512VL
|
||||
|
||||
define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
; AVX-LABEL: test_x86_vcvtps2ph_128:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
; AVX512VL-LABEL: test_x86_vcvtps2ph_128:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0xc0,0x00]
|
||||
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
|
||||
|
||||
define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; X32-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
; AVX-LABEL: test_x86_vcvtps2ph_256:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; X64-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
; AVX512VL-LABEL: test_x86_vcvtps2ph_256:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0xc0,0x00]
|
||||
; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
|
||||
%res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
|
||||
|
||||
define <4 x float> @test_x86_vcvtps2ph_128_scalar(i64* %ptr) {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128_scalar:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128_scalar:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load i64, i64* %ptr
|
||||
%ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
|
||||
%ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1
|
||||
%bc = bitcast <2 x i64> %ins2 to <8 x i16>
|
||||
%res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc) #2
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_vcvtps2ph_128_scalar2(i64* %ptr) {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128_scalar2:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtph2ps (%eax), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128_scalar2:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: vcvtph2ps (%rdi), %xmm0 # encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
|
||||
; X32-AVX512VL: # %bb.0:
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtph2ps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x00]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_scalar2:
|
||||
; X64-AVX512VL: # %bb.0:
|
||||
; X64-AVX512VL-NEXT: vcvtph2ps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0x07]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
%load = load i64, i64* %ptr
|
||||
%ins = insertelement <2 x i64> undef, i64 %load, i32 0
|
||||
%bc = bitcast <2 x i64> %ins to <8 x i16>
|
||||
%res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) nounwind {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
|
||||
; X32-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
; X86-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtps2ph $3, %ymm0, (%eax) # encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
|
||||
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X64: # %bb.0: # %entry
|
||||
@ -235,12 +51,12 @@ define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) n
|
||||
; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X32-AVX512VL: # %bb.0: # %entry
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
|
||||
; X32-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X86-AVX512VL: # %bb.0: # %entry
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtps2ph $3, %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x1d,0x00,0x03]
|
||||
; X86-AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_256_m:
|
||||
; X64-AVX512VL: # %bb.0: # %entry
|
||||
@ -254,22 +70,22 @@ entry:
|
||||
}
|
||||
|
||||
define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) nounwind {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
; X86-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X32-AVX512VL: # %bb.0: # %entry
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X86-AVX512VL: # %bb.0: # %entry
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m:
|
||||
; X64-AVX512VL: # %bb.0: # %entry
|
||||
@ -282,60 +98,60 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f4x32) #0 {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f4X86) #0 {
|
||||
; X86-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X32-AVX512VL: # %bb.0: # %entry
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X86-AVX512VL: # %bb.0: # %entry
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m2:
|
||||
; X64-AVX512VL: # %bb.0: # %entry
|
||||
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
|
||||
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4X86, i32 3)
|
||||
%1 = bitcast <8 x i16> %0 to <2 x double>
|
||||
%vecext = extractelement <2 x double> %1, i32 0
|
||||
store double %vecext, double* %hf4x16, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x32) #0 {
|
||||
; X32-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X32: # %bb.0: # %entry
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-NEXT: retl # encoding: [0xc3]
|
||||
define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4X86) #0 {
|
||||
; X86-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X86: # %bb.0: # %entry
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-NEXT: vcvtps2ph $3, %xmm0, (%eax) # encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X64: # %bb.0: # %entry
|
||||
; X64-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
|
||||
; X64-NEXT: retq # encoding: [0xc3]
|
||||
;
|
||||
; X32-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X32-AVX512VL: # %bb.0: # %entry
|
||||
; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X32-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X32-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
; X86-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X86-AVX512VL: # %bb.0: # %entry
|
||||
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
|
||||
; X86-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x00,0x03]
|
||||
; X86-AVX512VL-NEXT: retl # encoding: [0xc3]
|
||||
;
|
||||
; X64-AVX512VL-LABEL: test_x86_vcvtps2ph_128_m3:
|
||||
; X64-AVX512VL: # %bb.0: # %entry
|
||||
; X64-AVX512VL-NEXT: vcvtps2ph $3, %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x1d,0x07,0x03]
|
||||
; X64-AVX512VL-NEXT: retq # encoding: [0xc3]
|
||||
entry:
|
||||
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
|
||||
%0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4X86, i32 3)
|
||||
%1 = bitcast <8 x i16> %0 to <2 x i64>
|
||||
%vecext = extractelement <2 x i64> %1, i32 0
|
||||
store i64 %vecext, i64* %hf4x16, align 8
|
||||
|
@ -5,14 +5,16 @@ declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
|
||||
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
|
||||
|
||||
;
|
||||
; Vector Demanded Bits
|
||||
; Vector Demanded Elts
|
||||
;
|
||||
|
||||
; Only bottom 4 elements required.
|
||||
define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
|
||||
; CHECK-LABEL: @demand_vcvtph2ps_128(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> [[A:%.*]])
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <4 x half>
|
||||
; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <4 x half> [[TMP2]] to <4 x float>
|
||||
; CHECK-NEXT: ret <4 x float> [[CVTPH2PS]]
|
||||
;
|
||||
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
|
||||
@ -23,8 +25,9 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
|
||||
define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
|
||||
; CHECK-LABEL: @demand_vcvtph2ps_256(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
|
||||
; CHECK-NEXT: ret <8 x float> [[TMP2]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half>
|
||||
; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float>
|
||||
; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]]
|
||||
;
|
||||
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
|
||||
|
Loading…
Reference in New Issue
Block a user