mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[X86] Remove the scalar intrinsics for fadd/fsub/fdiv/fmul
Summary: These intrinsics have been unused for clang for a while. This patch removes them. We auto upgrade them to extractelements, a scalar operation and then an insertelement. This matches the sequence used by clangs intrinsic file. Reviewers: zvi, delena, RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D26660 llvm-svn: 287083
This commit is contained in:
parent
af42944969
commit
9352cc47c5
@ -145,18 +145,6 @@ let TargetPrefix = "x86" in {
|
||||
|
||||
// Arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse_add_ss : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse_sub_ss : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse_mul_ss : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse_div_ss : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
|
||||
[IntrNoMem]>;
|
||||
@ -287,18 +275,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
|
||||
// FP arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse2_add_sd : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_sub_sd : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_mul_sd : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_div_sd : // TODO: Remove this intrinsic.
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
|
||||
llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
@ -238,6 +238,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name.startswith("avx2.pcmpgt.") || // Added in 3.1
|
||||
Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
|
||||
Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
|
||||
Name == "sse.add.ss" || // Added in 4.0
|
||||
Name == "sse2.add.sd" || // Added in 4.0
|
||||
Name == "sse.sub.ss" || // Added in 4.0
|
||||
Name == "sse2.sub.sd" || // Added in 4.0
|
||||
Name == "sse.mul.ss" || // Added in 4.0
|
||||
Name == "sse2.mul.sd" || // Added in 4.0
|
||||
Name == "sse.div.ss" || // Added in 4.0
|
||||
Name == "sse2.div.sd" || // Added in 4.0
|
||||
Name == "sse41.pmaxsb" || // Added in 3.9
|
||||
Name == "sse2.pmaxs.w" || // Added in 3.9
|
||||
Name == "sse41.pmaxsd" || // Added in 3.9
|
||||
@ -732,6 +740,42 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
|
||||
"pcmpgt");
|
||||
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
|
||||
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
|
||||
Builder.CreateFAdd(Elt0, Elt1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
} else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
|
||||
Builder.CreateFSub(Elt0, Elt1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
} else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
|
||||
Builder.CreateFMul(Elt0, Elt1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
} else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
|
||||
Type *I32Ty = Type::getInt32Ty(C);
|
||||
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
|
||||
Builder.CreateFDiv(Elt0, Elt1),
|
||||
ConstantInt::get(I32Ty, 0));
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
|
||||
Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
|
||||
} else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
|
||||
|
@ -259,26 +259,24 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
|
||||
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
string asm, string SSEVer, string FPSizeStr,
|
||||
Operand memopr, ComplexPattern mem_cpat,
|
||||
Domain d, OpndItins itins, bit Is2Addr = 1> {
|
||||
let isCodeGenOnly = 1 in {
|
||||
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator Int, RegisterClass RC,
|
||||
string asm, Operand memopr,
|
||||
ComplexPattern mem_cpat, Domain d,
|
||||
OpndItins itins, bit Is2Addr = 1> {
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (!cast<Intrinsic>(
|
||||
!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
|
||||
RC:$src1, RC:$src2))], itins.rr, d>,
|
||||
[(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>,
|
||||
Sched<[itins.Sched]>;
|
||||
let mayLoad = 1 in
|
||||
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
|
||||
SSEVer, "_", OpcodeStr, FPSizeStr))
|
||||
RC:$src1, mem_cpat:$src2))], itins.rm, d>,
|
||||
[(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
}
|
||||
@ -3064,20 +3062,22 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
}
|
||||
|
||||
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
SDPatternOperator IntSS,
|
||||
SDPatternOperator IntSD,
|
||||
SizeItins itins> {
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
|
||||
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
|
||||
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
|
||||
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
|
||||
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
|
||||
SSEPackedSingle, itins.s>, XS;
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
|
||||
!strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
|
||||
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
|
||||
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
|
||||
SSEPackedDouble, itins.d>, XD;
|
||||
}
|
||||
}
|
||||
@ -3085,23 +3085,29 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
|
||||
// Binary Arithmetic instructions
|
||||
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag,
|
||||
SSE_ALU_ITINS_S>;
|
||||
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag,
|
||||
SSE_MUL_ITINS_S>;
|
||||
let isCommutable = 0 in {
|
||||
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag,
|
||||
SSE_ALU_ITINS_S>;
|
||||
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag,
|
||||
SSE_DIV_ITINS_S>;
|
||||
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss,
|
||||
int_x86_sse2_max_sd, SSE_ALU_ITINS_S>;
|
||||
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
|
||||
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
|
||||
basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss,
|
||||
int_x86_sse2_min_sd, SSE_ALU_ITINS_S>;
|
||||
}
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
|
@ -1751,17 +1751,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::x86_sse_add_ss:
|
||||
case Intrinsic::x86_sse_sub_ss:
|
||||
case Intrinsic::x86_sse_mul_ss:
|
||||
case Intrinsic::x86_sse_div_ss:
|
||||
case Intrinsic::x86_sse_min_ss:
|
||||
case Intrinsic::x86_sse_max_ss:
|
||||
case Intrinsic::x86_sse_cmp_ss:
|
||||
case Intrinsic::x86_sse2_add_sd:
|
||||
case Intrinsic::x86_sse2_sub_sd:
|
||||
case Intrinsic::x86_sse2_mul_sd:
|
||||
case Intrinsic::x86_sse2_div_sd:
|
||||
case Intrinsic::x86_sse2_min_sd:
|
||||
case Intrinsic::x86_sse2_max_sd:
|
||||
case Intrinsic::x86_sse2_cmp_sd: {
|
||||
|
@ -1274,17 +1274,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
|
||||
// Binary scalar-as-vector operations that work column-wise. A dest element
|
||||
// is a function of the corresponding input elements from the two inputs.
|
||||
case Intrinsic::x86_sse_add_ss:
|
||||
case Intrinsic::x86_sse_sub_ss:
|
||||
case Intrinsic::x86_sse_mul_ss:
|
||||
case Intrinsic::x86_sse_div_ss:
|
||||
case Intrinsic::x86_sse_min_ss:
|
||||
case Intrinsic::x86_sse_max_ss:
|
||||
case Intrinsic::x86_sse_cmp_ss:
|
||||
case Intrinsic::x86_sse2_add_sd:
|
||||
case Intrinsic::x86_sse2_sub_sd:
|
||||
case Intrinsic::x86_sse2_mul_sd:
|
||||
case Intrinsic::x86_sse2_div_sd:
|
||||
case Intrinsic::x86_sse2_min_sd:
|
||||
case Intrinsic::x86_sse2_max_sd:
|
||||
case Intrinsic::x86_sse2_cmp_sd:
|
||||
@ -1297,62 +1289,6 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
||||
UndefElts2, Depth + 1);
|
||||
if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
|
||||
|
||||
// If only the low elt is demanded and this is a scalarizable intrinsic,
|
||||
// scalarize it now.
|
||||
if (DemandedElts == 1) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::x86_sse_add_ss:
|
||||
case Intrinsic::x86_sse_sub_ss:
|
||||
case Intrinsic::x86_sse_mul_ss:
|
||||
case Intrinsic::x86_sse_div_ss:
|
||||
case Intrinsic::x86_sse2_add_sd:
|
||||
case Intrinsic::x86_sse2_sub_sd:
|
||||
case Intrinsic::x86_sse2_mul_sd:
|
||||
case Intrinsic::x86_sse2_div_sd:
|
||||
// TODO: Lower MIN/MAX/etc.
|
||||
Value *LHS = II->getArgOperand(0);
|
||||
Value *RHS = II->getArgOperand(1);
|
||||
// Extract the element as scalars.
|
||||
LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
|
||||
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
|
||||
RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
|
||||
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
|
||||
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: llvm_unreachable("Case stmts out of sync!");
|
||||
case Intrinsic::x86_sse_add_ss:
|
||||
case Intrinsic::x86_sse2_add_sd:
|
||||
TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS,
|
||||
II->getName()), *II);
|
||||
break;
|
||||
case Intrinsic::x86_sse_sub_ss:
|
||||
case Intrinsic::x86_sse2_sub_sd:
|
||||
TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
|
||||
II->getName()), *II);
|
||||
break;
|
||||
case Intrinsic::x86_sse_mul_ss:
|
||||
case Intrinsic::x86_sse2_mul_sd:
|
||||
TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
|
||||
II->getName()), *II);
|
||||
break;
|
||||
case Intrinsic::x86_sse_div_ss:
|
||||
case Intrinsic::x86_sse2_div_sd:
|
||||
TmpV = InsertNewInstWith(BinaryOperator::CreateFDiv(LHS, RHS,
|
||||
II->getName()), *II);
|
||||
break;
|
||||
}
|
||||
|
||||
Instruction *New =
|
||||
InsertElementInst::Create(
|
||||
UndefValue::get(II->getType()), TmpV,
|
||||
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
|
||||
II->getName());
|
||||
InsertNewInstWith(New, *II);
|
||||
return New;
|
||||
}
|
||||
}
|
||||
|
||||
// If lowest element of a scalar op isn't used then use Arg0.
|
||||
if (DemandedElts.getLoBits(1) != 1)
|
||||
return II->getArgOperand(0);
|
||||
|
@ -68,17 +68,6 @@ define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
|
||||
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse2_add_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse2_cmp_pd:
|
||||
; CHECK: ## BB#0:
|
||||
@ -390,17 +379,6 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
|
||||
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse2_div_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; AVX-LABEL: test_x86_sse2_max_pd:
|
||||
@ -469,17 +447,6 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse2_mul_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; AVX-LABEL: test_x86_sse2_packssdw_128:
|
||||
; AVX: ## BB#0:
|
||||
@ -1121,17 +1088,6 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse2_sub_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; AVX-LABEL: test_x86_sse2_ucomieq_sd:
|
||||
; AVX: ## BB#0:
|
||||
@ -1959,17 +1915,6 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
|
||||
}
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse_add_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse_cmp_ps:
|
||||
; CHECK: ## BB#0:
|
||||
@ -2170,17 +2115,6 @@ define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
|
||||
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse_div_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define void @test_x86_sse_ldmxcsr(i8* %a0) {
|
||||
; CHECK-LABEL: test_x86_sse_ldmxcsr:
|
||||
; CHECK: ## BB#0:
|
||||
@ -2260,17 +2194,6 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse_mul_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
|
||||
; AVX-LABEL: test_x86_sse_rcp_ps:
|
||||
; AVX: ## BB#0:
|
||||
@ -2359,17 +2282,6 @@ define void @test_x86_sse_stmxcsr(i8* %a0) {
|
||||
declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: test_x86_sse_sub_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1]
|
||||
; CHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; AVX-LABEL: test_x86_sse_ucomieq_ss:
|
||||
; AVX: ## BB#0:
|
||||
@ -3805,8 +3717,8 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
|
||||
; AVX-LABEL: movnt_dq:
|
||||
; AVX: ## BB#0:
|
||||
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; AVX-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A]
|
||||
; AVX-NEXT: ## fixup A - offset: 4, value: LCPI254_0, kind: FK_Data_4
|
||||
; AVX-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd4,0x05,A,A,A,A]
|
||||
; AVX-NEXT: ## fixup A - offset: 4, value: LCPI246_0, kind: FK_Data_4
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0xc5,0xfd,0xe7,0x00]
|
||||
; AVX-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
|
||||
; AVX-NEXT: retl ## encoding: [0xc3]
|
||||
@ -3814,8 +3726,8 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
|
||||
; AVX512VL-LABEL: movnt_dq:
|
||||
; AVX512VL: ## BB#0:
|
||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
|
||||
; AVX512VL-NEXT: vpaddq LCPI254_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI254_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: vpaddq LCPI246_0, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0x05,A,A,A,A]
|
||||
; AVX512VL-NEXT: ## fixup A - offset: 6, value: LCPI246_0, kind: FK_Data_4
|
||||
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ## encoding: [0x62,0xf1,0x7d,0x28,0xe7,0x00]
|
||||
; AVX512VL-NEXT: retl ## encoding: [0xc3]
|
||||
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
|
||||
|
@ -1,4 +1,3 @@
|
||||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
|
||||
|
||||
@ -25,3 +24,103 @@ define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
|
||||
declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_add_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse_add_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_add_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x58,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse_add_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: addss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_sub_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse_sub_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_sub_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5c,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse_sub_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: subss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_mul_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse_mul_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_mul_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x59,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse_mul_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: mulss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_div_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse_div_ss:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse_div_ss:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5e,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse_div_ss:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: divss %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
|
@ -4,22 +4,6 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX
|
||||
|
||||
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_add_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: addss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_add_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_cmp_ps:
|
||||
; SSE: ## BB#0:
|
||||
@ -292,22 +276,6 @@ define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
|
||||
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_div_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: divss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5e,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_div_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5e,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define void @test_x86_sse_ldmxcsr(i8* %a0) {
|
||||
; SSE-LABEL: test_x86_sse_ldmxcsr:
|
||||
; SSE: ## BB#0:
|
||||
@ -418,22 +386,6 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_mul_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_mul_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
|
||||
; SSE-LABEL: test_x86_sse_rcp_ps:
|
||||
; SSE: ## BB#0:
|
||||
@ -558,22 +510,6 @@ define void @test_x86_sse_stmxcsr(i8* %a0) {
|
||||
declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
|
||||
|
||||
|
||||
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_sub_ss:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: subss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5c,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse_sub_ss:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5c,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
|
||||
ret <4 x float> %res
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
|
||||
; SSE-LABEL: test_x86_sse_ucomieq_ss:
|
||||
; SSE: ## BB#0:
|
||||
|
@ -186,3 +186,104 @@ define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
|
||||
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_add_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_add_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_add_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x58,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse2_add_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: addsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_sub_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_sub_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_sub_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x5c,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse2_sub_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: subsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_mul_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_mul_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_mul_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x59,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse2_mul_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: mulsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_div_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; AVX2-LABEL: test_x86_sse2_div_sd:
|
||||
; AVX2: ## BB#0:
|
||||
; AVX2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
|
||||
; AVX2-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; SKX-LABEL: test_x86_sse2_div_sd:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x5e,0xc1]
|
||||
; SKX-NEXT: retl ## encoding: [0xc3]
|
||||
; CHECK-LABEL: test_x86_sse2_div_sd:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: divsd %xmm1, %xmm0
|
||||
; CHECK-NEXT: retl
|
||||
%res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
|
@ -3,22 +3,6 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX
|
||||
|
||||
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_add_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_add_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_cmp_pd:
|
||||
; SSE: ## BB#0:
|
||||
@ -584,23 +568,6 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
|
||||
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_div_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_div_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_max_pd:
|
||||
; SSE: ## BB#0:
|
||||
@ -693,22 +660,6 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_mul_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_mul_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_packssdw_128:
|
||||
; SSE: ## BB#0:
|
||||
@ -1555,22 +1506,6 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_sub_sd:
|
||||
; SSE: ## BB#0:
|
||||
; SSE-NEXT: subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
|
||||
; SSE-NEXT: retl ## encoding: [0xc3]
|
||||
;
|
||||
; VCHECK-LABEL: test_x86_sse2_sub_sd:
|
||||
; VCHECK: ## BB#0:
|
||||
; VCHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
|
||||
; VCHECK-NEXT: retl ## encoding: [0xc3]
|
||||
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
|
||||
define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
|
||||
; SSE-LABEL: test_x86_sse2_ucomieq_sd:
|
||||
; SSE: ## BB#0:
|
||||
|
@ -10,9 +10,10 @@ define i16 @test1(float %f) nounwind {
|
||||
; X32-LABEL: test1:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-NEXT: subss LCPI0_0, %xmm0
|
||||
; X32-NEXT: addss LCPI0_0, %xmm0
|
||||
; X32-NEXT: mulss LCPI0_1, %xmm0
|
||||
; X32-NEXT: xorps %xmm1, %xmm1
|
||||
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32-NEXT: minss LCPI0_2, %xmm0
|
||||
; X32-NEXT: maxss %xmm1, %xmm0
|
||||
; X32-NEXT: cvttss2si %xmm0, %eax
|
||||
@ -21,46 +22,60 @@ define i16 @test1(float %f) nounwind {
|
||||
;
|
||||
; X64-LABEL: test1:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: xorps %xmm1, %xmm1
|
||||
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64-NEXT: subss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: mulss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: minss {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: maxss %xmm1, %xmm0
|
||||
; X64-NEXT: cvttss2si %xmm0, %eax
|
||||
; X64-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32_AVX-LABEL: test1:
|
||||
; X32_AVX: ## BB#0:
|
||||
; X32_AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32_AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX-NEXT: vsubss LCPI0_0, %xmm0, %xmm0
|
||||
; X32_AVX-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
|
||||
; X32_AVX-NEXT: vminss LCPI0_2, %xmm0, %xmm0
|
||||
; X32_AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
; X32_AVX-NEXT: vcvttss2si %xmm0, %eax
|
||||
; X32_AVX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; X32_AVX-NEXT: retl
|
||||
; X32_AVX1-LABEL: test1:
|
||||
; X32_AVX1: ## BB#0:
|
||||
; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX1-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32_AVX1-NEXT: vminss LCPI0_2, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
; X32_AVX1-NEXT: vcvttss2si %xmm0, %eax
|
||||
; X32_AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; X32_AVX1-NEXT: retl
|
||||
;
|
||||
; X64_AVX1-LABEL: test1:
|
||||
; X64_AVX1: ## BB#0:
|
||||
; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX1-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX1-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
; X64_AVX1-NEXT: vcvttss2si %xmm0, %eax
|
||||
; X64_AVX1-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; X64_AVX1-NEXT: retq
|
||||
;
|
||||
; X32_AVX512-LABEL: test1:
|
||||
; X32_AVX512: ## BB#0:
|
||||
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X32_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X32_AVX512-NEXT: vaddss LCPI0_0, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vmulss LCPI0_1, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X32_AVX512-NEXT: vminss LCPI0_2, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
; X32_AVX512-NEXT: vcvttss2si %xmm0, %eax
|
||||
; X32_AVX512-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||
; X32_AVX512-NEXT: retl
|
||||
;
|
||||
; X64_AVX512-LABEL: test1:
|
||||
; X64_AVX512: ## BB#0:
|
||||
; X64_AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX512-NEXT: vsubss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
||||
; X64_AVX512-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
|
||||
; X64_AVX512-NEXT: vcvttss2si %xmm0, %eax
|
||||
|
@ -89,18 +89,6 @@ define float @test_rsqrt_ss_3(float %a) {
|
||||
ret float %6
|
||||
}
|
||||
|
||||
define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_add_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3)
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
define float @test_add_ss_0(float %a, float %b) {
|
||||
; CHECK-LABEL: @test_add_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b
|
||||
@ -133,18 +121,6 @@ define float @test_add_ss_1(float %a, float %b) {
|
||||
ret float %7
|
||||
}
|
||||
|
||||
define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_sub_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3)
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
define float @test_sub_ss_0(float %a, float %b) {
|
||||
; CHECK-LABEL: @test_sub_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b
|
||||
@ -177,18 +153,6 @@ define float @test_sub_ss_2(float %a, float %b) {
|
||||
ret float %7
|
||||
}
|
||||
|
||||
define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_mul_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3)
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
define float @test_mul_ss_0(float %a, float %b) {
|
||||
; CHECK-LABEL: @test_mul_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b
|
||||
@ -221,18 +185,6 @@ define float @test_mul_ss_3(float %a, float %b) {
|
||||
ret float %7
|
||||
}
|
||||
|
||||
define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK-LABEL: @test_div_ss(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b)
|
||||
; CHECK-NEXT: ret <4 x float> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
|
||||
%2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
|
||||
%3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
|
||||
%4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3)
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
define float @test_div_ss_0(float %a, float %b) {
|
||||
; CHECK-LABEL: @test_div_ss_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b
|
||||
|
@ -27,16 +27,6 @@ define double @test_sqrt_sd_1(double %a) {
|
||||
ret double %4
|
||||
}
|
||||
|
||||
define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: @test_add_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %b)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define double @test_add_sd_0(double %a, double %b) {
|
||||
; CHECK-LABEL: @test_add_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fadd double %a, %b
|
||||
@ -64,16 +54,6 @@ define double @test_add_sd_1(double %a, double %b) {
|
||||
ret double %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: @test_sub_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %b)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define double @test_sub_sd_0(double %a, double %b) {
|
||||
; CHECK-LABEL: @test_sub_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b
|
||||
@ -101,16 +81,6 @@ define double @test_sub_sd_1(double %a, double %b) {
|
||||
ret double %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: @test_mul_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %b)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define double @test_mul_sd_0(double %a, double %b) {
|
||||
; CHECK-LABEL: @test_mul_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b
|
||||
@ -138,16 +108,6 @@ define double @test_mul_sd_1(double %a, double %b) {
|
||||
ret double %6
|
||||
}
|
||||
|
||||
define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-LABEL: @test_div_sd(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %b)
|
||||
; CHECK-NEXT: ret <2 x double> [[TMP1]]
|
||||
;
|
||||
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
|
||||
%2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
|
||||
define double @test_div_sd_0(double %a, double %b) {
|
||||
; CHECK-LABEL: @test_div_sd_0(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double %a, %b
|
||||
|
Loading…
x
Reference in New Issue
Block a user