mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[x86] Fix a pretty horrible bug and inconsistency in the x86 asm
parsing (and latent bug in the instruction definitions). This is effectively a revert of r136287 which tried to address a specific and narrow case of immediate operands failing to be accepted by x86 instructions with a pretty heavy hammer: it introduced a new kind of operand that behaved differently. All of that is removed with this commit, but the test cases are both preserved and enhanced. The core problem that r136287 and this commit are trying to handle is that gas accepts both of the following instructions: insertps $192, %xmm0, %xmm1 insertps $-64, %xmm0, %xmm1 These will encode to the same byte sequence, with the immediate occupying an 8-bit entry. The first form was fixed by r136287 but that broke the prior handling of the second form! =[ Ironically, we would still emit the second form in some cases and then be unable to re-assemble the output. The reason why the first instruction failed to be handled is because prior to r136287 the operands ere marked 'i32i8imm' which forces them to be sign-extenable. Clearly, that won't work for 192 in a single byte. However, making thim zero-extended or "unsigned" doesn't really address the core issue either because it breaks negative immediates. The correct fix is to make these operands 'i8imm' reflecting that they can be either signed or unsigned but must be 8-bit immediates. This patch backs out r136287 and then changes those places as well as some others to use 'i8imm' rather than one of the extended variants. Naturally, this broke something else. The custom DAG nodes had to be updated to have a much more accurate type constraint of an i8 node, and a bunch of Pat immediates needed to be specified as i8 values. The fallout didn't end there though. We also then ceased to be able to match the instruction-specific intrinsics to the instructions so modified. Digging, this is because they too used i32 rather than i8 in their signature. So I've also switched those intrinsics to i8 arguments in line with the instructions. In order to make the intrinsic adjustments of course, I also had to add auto upgrading for the intrinsics. I suspect that the intrinsic argument types may have led everything down this rabbit hole. Pretty happy with the result. llvm-svn: 217310
This commit is contained in:
parent
820d92269f
commit
5b09348e8e
@ -886,7 +886,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
// Vector insert
|
// Vector insert
|
||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
|
def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">,
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -896,13 +896,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
|
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
|
def int_x86_sse41_pblendw : GCCBuiltin<"__builtin_ia32_pblendw128">,
|
||||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
|
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
|
def int_x86_sse41_blendpd : GCCBuiltin<"__builtin_ia32_blendpd">,
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
|
def int_x86_sse41_blendps : GCCBuiltin<"__builtin_ia32_blendps">,
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
|
def int_x86_sse41_blendvpd : GCCBuiltin<"__builtin_ia32_blendvpd">,
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
|
||||||
@ -915,17 +915,17 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
// Vector dot product
|
// Vector dot product
|
||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
|
def int_x86_sse41_dppd : GCCBuiltin<"__builtin_ia32_dppd">,
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||||
[IntrNoMem, Commutative]>;
|
[IntrNoMem, Commutative]>;
|
||||||
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
|
def int_x86_sse41_dpps : GCCBuiltin<"__builtin_ia32_dpps">,
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||||
[IntrNoMem, Commutative]>;
|
[IntrNoMem, Commutative]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vector sum of absolute differences
|
// Vector sum of absolute differences
|
||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
|
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
|
||||||
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
|
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
|
||||||
[IntrNoMem, Commutative]>;
|
[IntrNoMem, Commutative]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1171,10 +1171,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
|
def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
|
||||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
||||||
llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
|
llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
|
def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
|
||||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
||||||
llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
|
def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
|
||||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
|
||||||
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
|
llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
|
||||||
@ -1187,7 +1187,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
|
def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
|
||||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
|
||||||
llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vector compare
|
// Vector compare
|
||||||
@ -1710,13 +1710,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
llvm_v32i8_ty], [IntrNoMem]>;
|
llvm_v32i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
|
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
|
||||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
|
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
|
||||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
|
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
|
||||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i8_ty], [IntrNoMem]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vector load with broadcast
|
// Vector load with broadcast
|
||||||
@ -1955,7 +1955,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
llvm_v32i8_ty], [IntrNoMem]>;
|
llvm_v32i8_ty], [IntrNoMem]>;
|
||||||
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
|
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
|
||||||
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
|
||||||
llvm_i32_ty], [IntrNoMem, Commutative]>;
|
llvm_i8_ty], [IntrNoMem, Commutative]>;
|
||||||
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
||||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,22 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
|
||||||
|
// arguments have changed their type from i32 to i8.
|
||||||
|
static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
|
||||||
|
Function *&NewFn) {
|
||||||
|
// Check that the last argument is an i32.
|
||||||
|
Type *LastArgType = F->getFunctionType()->getParamType(
|
||||||
|
F->getFunctionType()->getNumParams() - 1);
|
||||||
|
if (!LastArgType->isIntegerTy(32))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Move this function aside and map down.
|
||||||
|
F->setName(F->getName() + ".old");
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||||
assert(F && "Illegal to upgrade a non-existent Function.");
|
assert(F && "Illegal to upgrade a non-existent Function.");
|
||||||
|
|
||||||
@ -130,6 +146,51 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
if (Name == "x86.sse41.ptestnzc")
|
if (Name == "x86.sse41.ptestnzc")
|
||||||
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
||||||
}
|
}
|
||||||
|
// Several blend and other instructions with maskes used the wrong number of
|
||||||
|
// bits.
|
||||||
|
if (Name == "x86.sse41.pblendw")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_pblendw,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.blendpd")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendpd,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.blendps")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_blendps,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.insertps")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.dppd")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.dpps")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.sse41.mpsadbw")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.avx.blend.pd.256")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(
|
||||||
|
F, Intrinsic::x86_avx_blend_pd_256, NewFn);
|
||||||
|
if (Name == "x86.avx.blend.ps.256")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(
|
||||||
|
F, Intrinsic::x86_avx_blend_ps_256, NewFn);
|
||||||
|
if (Name == "x86.avx.dp.ps.256")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.avx2.pblendw")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_pblendw,
|
||||||
|
NewFn);
|
||||||
|
if (Name == "x86.avx2.pblendd.128")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(
|
||||||
|
F, Intrinsic::x86_avx2_pblendd_128, NewFn);
|
||||||
|
if (Name == "x86.avx2.pblendd.256")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(
|
||||||
|
F, Intrinsic::x86_avx2_pblendd_256, NewFn);
|
||||||
|
if (Name == "x86.avx2.mpsadbw")
|
||||||
|
return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
|
||||||
|
NewFn);
|
||||||
|
|
||||||
// frcz.ss/sd may need to have an argument dropped
|
// frcz.ss/sd may need to have an argument dropped
|
||||||
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
|
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
|
||||||
F->setName(Name + ".old");
|
F->setName(Name + ".old");
|
||||||
@ -413,6 +474,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case Intrinsic::x86_sse41_pblendw:
|
||||||
|
case Intrinsic::x86_sse41_blendpd:
|
||||||
|
case Intrinsic::x86_sse41_blendps:
|
||||||
|
case Intrinsic::x86_sse41_insertps:
|
||||||
|
case Intrinsic::x86_sse41_dppd:
|
||||||
|
case Intrinsic::x86_sse41_dpps:
|
||||||
|
case Intrinsic::x86_sse41_mpsadbw:
|
||||||
|
case Intrinsic::x86_avx_blend_pd_256:
|
||||||
|
case Intrinsic::x86_avx_blend_ps_256:
|
||||||
|
case Intrinsic::x86_avx_dp_ps_256:
|
||||||
|
case Intrinsic::x86_avx2_pblendw:
|
||||||
|
case Intrinsic::x86_avx2_pblendd_128:
|
||||||
|
case Intrinsic::x86_avx2_pblendd_256:
|
||||||
|
case Intrinsic::x86_avx2_mpsadbw: {
|
||||||
|
// Need to truncate the last argument from i32 to i8 -- this argument models
|
||||||
|
// an inherently 8-bit immediate operand to these x86 instructions.
|
||||||
|
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
|
||||||
|
CI->arg_operands().end());
|
||||||
|
|
||||||
|
// Replace the last argument with a trunc.
|
||||||
|
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
|
||||||
|
|
||||||
|
CallInst *NewCall = Builder.CreateCall(NewFn, Args);
|
||||||
|
CI->replaceAllUsesWith(NewCall);
|
||||||
|
CI->eraseFromParent();
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,10 +24,6 @@ inline bool isImmSExti32i8Value(uint64_t Value) {
|
|||||||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
|
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool isImmZExtu32u8Value(uint64_t Value) {
|
|
||||||
return (Value <= 0x00000000000000FFULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool isImmSExti64i8Value(uint64_t Value) {
|
inline bool isImmSExti64i8Value(uint64_t Value) {
|
||||||
return (( Value <= 0x000000000000007FULL)||
|
return (( Value <= 0x000000000000007FULL)||
|
||||||
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
|
(0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
|
||||||
|
@ -153,20 +153,6 @@ struct X86Operand : public MCParsedAsmOperand {
|
|||||||
// extension.
|
// extension.
|
||||||
return isImmSExti32i8Value(CE->getValue());
|
return isImmSExti32i8Value(CE->getValue());
|
||||||
}
|
}
|
||||||
bool isImmZExtu32u8() const {
|
|
||||||
if (!isImm())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// If this isn't a constant expr, just assume it fits and let relaxation
|
|
||||||
// handle it.
|
|
||||||
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
|
|
||||||
if (!CE)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Otherwise, check the value is in a range that makes sense for this
|
|
||||||
// extension.
|
|
||||||
return isImmZExtu32u8Value(CE->getValue());
|
|
||||||
}
|
|
||||||
bool isImmSExti64i8() const {
|
bool isImmSExti64i8() const {
|
||||||
if (!isImm())
|
if (!isImm())
|
||||||
return false;
|
return false;
|
||||||
|
@ -393,12 +393,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
|
|||||||
|
|
||||||
// vinsertps - insert f32 to XMM
|
// vinsertps - insert f32 to XMM
|
||||||
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
|
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
|
||||||
(ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
|
(ins VR128X:$src1, VR128X:$src2, i8imm:$src3),
|
||||||
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
|
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
|
||||||
EVEX_4V;
|
EVEX_4V;
|
||||||
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
|
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
|
||||||
(ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
|
(ins VR128X:$src1, f32mem:$src2, i8imm:$src3),
|
||||||
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||||
[(set VR128X:$dst, (X86insertps VR128X:$src1,
|
[(set VR128X:$dst, (X86insertps VR128X:$src1,
|
||||||
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||||
@ -538,13 +538,13 @@ def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
|
|||||||
|
|
||||||
// vextractps - extract 32 bits from XMM
|
// vextractps - extract 32 bits from XMM
|
||||||
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
|
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
|
||||||
(ins VR128X:$src1, u32u8imm:$src2),
|
(ins VR128X:$src1, i32i8imm:$src2),
|
||||||
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
|
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
|
||||||
EVEX;
|
EVEX;
|
||||||
|
|
||||||
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
|
def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
|
||||||
(ins f32mem:$dst, VR128X:$src1, u32u8imm:$src2),
|
(ins f32mem:$dst, VR128X:$src1, i32i8imm:$src2),
|
||||||
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
|
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
|
||||||
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
|
addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
|
||||||
|
@ -83,7 +83,7 @@ def X86pinsrw : SDNode<"X86ISD::PINSRW",
|
|||||||
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86insertps : SDNode<"X86ISD::INSERTPS",
|
def X86insertps : SDNode<"X86ISD::INSERTPS",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
||||||
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>;
|
||||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||||
|
|
||||||
@ -197,7 +197,7 @@ def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
|
|||||||
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
|
def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
|
||||||
|
|
||||||
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
|
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
|
||||||
|
|
||||||
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
|
def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
|
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
|
||||||
|
@ -551,11 +551,6 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
|
|||||||
let RenderMethod = "addImmOperands";
|
let RenderMethod = "addImmOperands";
|
||||||
}
|
}
|
||||||
|
|
||||||
class ImmZExtAsmOperandClass : AsmOperandClass {
|
|
||||||
let SuperClasses = [ImmAsmOperand];
|
|
||||||
let RenderMethod = "addImmOperands";
|
|
||||||
}
|
|
||||||
|
|
||||||
def X86GR32orGR64AsmOperand : AsmOperandClass {
|
def X86GR32orGR64AsmOperand : AsmOperandClass {
|
||||||
let Name = "GR32orGR64";
|
let Name = "GR32orGR64";
|
||||||
}
|
}
|
||||||
@ -568,6 +563,7 @@ def AVX512RC : Operand<i32> {
|
|||||||
let PrintMethod = "printRoundingControl";
|
let PrintMethod = "printRoundingControl";
|
||||||
let OperandType = "OPERAND_IMMEDIATE";
|
let OperandType = "OPERAND_IMMEDIATE";
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sign-extended immediate classes. We don't need to define the full lattice
|
// Sign-extended immediate classes. We don't need to define the full lattice
|
||||||
// here because there is no instruction with an ambiguity between ImmSExti64i32
|
// here because there is no instruction with an ambiguity between ImmSExti64i32
|
||||||
// and ImmSExti32i8.
|
// and ImmSExti32i8.
|
||||||
@ -595,12 +591,6 @@ def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
|
|||||||
let Name = "ImmSExti32i8";
|
let Name = "ImmSExti32i8";
|
||||||
}
|
}
|
||||||
|
|
||||||
// [0, 0x000000FF]
|
|
||||||
def ImmZExtu32u8AsmOperand : ImmZExtAsmOperandClass {
|
|
||||||
let Name = "ImmZExtu32u8";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// [0, 0x0000007F] |
|
// [0, 0x0000007F] |
|
||||||
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
|
// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
|
||||||
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
|
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
|
||||||
@ -620,11 +610,6 @@ def i32i8imm : Operand<i32> {
|
|||||||
let ParserMatchClass = ImmSExti32i8AsmOperand;
|
let ParserMatchClass = ImmSExti32i8AsmOperand;
|
||||||
let OperandType = "OPERAND_IMMEDIATE";
|
let OperandType = "OPERAND_IMMEDIATE";
|
||||||
}
|
}
|
||||||
// 32-bits but only 8 bits are significant, and those 8 bits are unsigned.
|
|
||||||
def u32u8imm : Operand<i32> {
|
|
||||||
let ParserMatchClass = ImmZExtu32u8AsmOperand;
|
|
||||||
let OperandType = "OPERAND_IMMEDIATE";
|
|
||||||
}
|
|
||||||
|
|
||||||
// 64-bits but only 32 bits are significant.
|
// 64-bits but only 32 bits are significant.
|
||||||
def i64i32imm : Operand<i64> {
|
def i64i32imm : Operand<i64> {
|
||||||
|
@ -5379,7 +5379,7 @@ let Predicates = [HasAVX] in {
|
|||||||
// the corresponding elements in the second input vector.
|
// the corresponding elements in the second input vector.
|
||||||
|
|
||||||
def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
|
def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
|
||||||
(v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
|
(v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i8 170))),
|
||||||
(VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
|
(VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
|
||||||
|
|
||||||
// Constant 10 corresponds to the binary mask '1010'.
|
// Constant 10 corresponds to the binary mask '1010'.
|
||||||
@ -5388,16 +5388,16 @@ let Predicates = [HasAVX] in {
|
|||||||
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
|
// - the 2nd and 4th element from the second input vector (the 'fadd' node).
|
||||||
|
|
||||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
|
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
|
||||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||||
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
|
||||||
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
|
(v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i8 10))),
|
||||||
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
(VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
|
||||||
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
||||||
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
|
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
|
||||||
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
(VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
||||||
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
||||||
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
|
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
|
||||||
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
(VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||||
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
||||||
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
||||||
@ -5411,11 +5411,11 @@ let Predicates = [UseSSE3] in {
|
|||||||
// - the 2nd and 4th element from the second input vector (the fadd node).
|
// - the 2nd and 4th element from the second input vector (the fadd node).
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
|
||||||
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
|
(v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i8 10))),
|
||||||
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
(ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
|
||||||
|
|
||||||
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
|
||||||
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
|
(v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i8 2))),
|
||||||
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
(ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
|
||||||
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
|
||||||
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
(v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
|
||||||
@ -6705,7 +6705,7 @@ let Constraints = "$src1 = $dst" in
|
|||||||
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
|
multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
|
||||||
OpndItins itins = DEFAULT_ITINS> {
|
OpndItins itins = DEFAULT_ITINS> {
|
||||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
!if(Is2Addr,
|
!if(Is2Addr,
|
||||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
!strconcat(asm,
|
!strconcat(asm,
|
||||||
@ -6714,7 +6714,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
|
|||||||
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
|
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
|
||||||
Sched<[WriteFShuffle]>;
|
Sched<[WriteFShuffle]>;
|
||||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
|
(ins VR128:$src1, f32mem:$src2, i8imm:$src3),
|
||||||
!if(Is2Addr,
|
!if(Is2Addr,
|
||||||
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
!strconcat(asm,
|
!strconcat(asm,
|
||||||
@ -7350,7 +7350,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||||||
OpndItins itins = DEFAULT_ITINS> {
|
OpndItins itins = DEFAULT_ITINS> {
|
||||||
let isCommutable = 1 in
|
let isCommutable = 1 in
|
||||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
||||||
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
|
(ins RC:$src1, RC:$src2, i8imm:$src3),
|
||||||
!if(Is2Addr,
|
!if(Is2Addr,
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
@ -7359,7 +7359,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
|
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
|
||||||
Sched<[itins.Sched]>;
|
Sched<[itins.Sched]>;
|
||||||
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||||
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
|
(ins RC:$src1, x86memop:$src2, i8imm:$src3),
|
||||||
!if(Is2Addr,
|
!if(Is2Addr,
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
@ -8579,13 +8579,13 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
|||||||
X86MemOperand x86memop> {
|
X86MemOperand x86memop> {
|
||||||
let isCommutable = 1 in
|
let isCommutable = 1 in
|
||||||
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
|
||||||
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
|
(ins RC:$src1, RC:$src2, i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||||
Sched<[WriteBlend]>, VEX_4V;
|
Sched<[WriteBlend]>, VEX_4V;
|
||||||
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
|
||||||
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
|
(ins RC:$src1, x86memop:$src2, i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||||
[(set RC:$dst,
|
[(set RC:$dst,
|
||||||
|
26
test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
Normal file
26
test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
|
||||||
|
|
||||||
|
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
||||||
|
; CHECK: vblendpd
|
||||||
|
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
|
||||||
|
ret <4 x double> %res
|
||||||
|
}
|
||||||
|
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||||
|
; CHECK: vblendps
|
||||||
|
%res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
|
||||||
|
ret <8 x float> %res
|
||||||
|
}
|
||||||
|
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||||
|
; CHECK: vdpps
|
||||||
|
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
|
||||||
|
ret <8 x float> %res
|
||||||
|
}
|
||||||
|
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
@ -818,18 +818,18 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
|
|||||||
|
|
||||||
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
; CHECK: vblendpd
|
; CHECK: vblendpd
|
||||||
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: vblendps
|
; CHECK: vblendps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||||
@ -850,35 +850,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
|
|||||||
|
|
||||||
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
; CHECK: vdppd
|
; CHECK: vdppd
|
||||||
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: vdpps
|
; CHECK: vdpps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: vinsertps
|
; CHECK: vinsertps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
; CHECK: vmpsadbw
|
; CHECK: vmpsadbw
|
||||||
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
|
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
@ -899,10 +899,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
|
|||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
; CHECK: vpblendw
|
; CHECK: vpblendw
|
||||||
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
|
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
|
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
|
||||||
@ -1770,18 +1770,18 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
|
|||||||
|
|
||||||
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
||||||
; CHECK: vblendpd
|
; CHECK: vblendpd
|
||||||
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
|
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
|
||||||
ret <4 x double> %res
|
ret <4 x double> %res
|
||||||
}
|
}
|
||||||
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
|
declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||||
; CHECK: vblendps
|
; CHECK: vblendps
|
||||||
%res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
|
%res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
|
||||||
ret <8 x float> %res
|
ret <8 x float> %res
|
||||||
}
|
}
|
||||||
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
|
||||||
@ -1950,10 +1950,10 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
|
|||||||
|
|
||||||
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
|
||||||
; CHECK: vdpps
|
; CHECK: vdpps
|
||||||
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
|
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
|
||||||
ret <8 x float> %res
|
ret <8 x float> %res
|
||||||
}
|
}
|
||||||
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
|
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
|
||||||
|
@ -60,7 +60,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
|
|||||||
; X32: movl 8(%esp), %ecx
|
; X32: movl 8(%esp), %ecx
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
;; Try to match a bit more of the instr, since we need the load's offset.
|
;; Try to match a bit more of the instr, since we need the load's offset.
|
||||||
; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
|
; CHECK: vinsertps $-64, 12(%{{...}},%{{...}}), %
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
|
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
|
||||||
%2 = load <4 x float>* %1, align 16
|
%2 = load <4 x float>* %1, align 16
|
||||||
|
33
test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
Normal file
33
test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
|
||||||
|
|
||||||
|
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||||
|
; CHECK: vpblendw
|
||||||
|
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
||||||
|
ret <16 x i16> %res
|
||||||
|
}
|
||||||
|
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
|
; CHECK: vpblendd
|
||||||
|
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
|
||||||
|
ret <4 x i32> %res
|
||||||
|
}
|
||||||
|
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||||
|
; CHECK: vpblendd
|
||||||
|
%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
|
||||||
|
ret <8 x i32> %res
|
||||||
|
}
|
||||||
|
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
|
; CHECK: vmpsadbw
|
||||||
|
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
||||||
|
ret <16 x i16> %res
|
||||||
|
}
|
||||||
|
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
|
||||||
|
|
@ -475,10 +475,10 @@ declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
|
|||||||
|
|
||||||
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
; CHECK: vmpsadbw
|
; CHECK: vmpsadbw
|
||||||
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
%res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
|
||||||
ret <16 x i16> %res
|
ret <16 x i16> %res
|
||||||
}
|
}
|
||||||
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
|
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
|
define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
|
||||||
@ -499,10 +499,10 @@ declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounw
|
|||||||
|
|
||||||
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||||
; CHECK: vpblendw
|
; CHECK: vpblendw
|
||||||
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
|
%res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
|
||||||
ret <16 x i16> %res
|
ret <16 x i16> %res
|
||||||
}
|
}
|
||||||
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
|
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
|
define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||||||
@ -706,18 +706,18 @@ declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind re
|
|||||||
|
|
||||||
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
|
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
; CHECK: vpblendd
|
; CHECK: vpblendd
|
||||||
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
|
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
|
||||||
ret <4 x i32> %res
|
ret <4 x i32> %res
|
||||||
}
|
}
|
||||||
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
|
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
|
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||||
; CHECK: vpblendd
|
; CHECK: vpblendd
|
||||||
%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
|
%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
|
||||||
ret <8 x i32> %res
|
ret <8 x i32> %res
|
||||||
}
|
}
|
||||||
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
|
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
|
define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
|
||||||
|
61
test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
Normal file
61
test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
|
||||||
|
; This test works just like the non-upgrade one except that it only checks
|
||||||
|
; forms which require auto-upgrading.
|
||||||
|
|
||||||
|
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
|
; CHECK: blendpd
|
||||||
|
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
||||||
|
ret <2 x double> %res
|
||||||
|
}
|
||||||
|
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
|
; CHECK: blendps
|
||||||
|
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
|
; CHECK: dppd
|
||||||
|
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
||||||
|
ret <2 x double> %res
|
||||||
|
}
|
||||||
|
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
|
; CHECK: dpps
|
||||||
|
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
|
; CHECK: insertps
|
||||||
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
||||||
|
ret <4 x float> %res
|
||||||
|
}
|
||||||
|
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
|
; CHECK: mpsadbw
|
||||||
|
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
|
; CHECK: pblendw
|
||||||
|
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
||||||
|
ret <8 x i16> %res
|
||||||
|
}
|
||||||
|
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
@ -2,18 +2,18 @@
|
|||||||
|
|
||||||
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
; CHECK: blendpd
|
; CHECK: blendpd
|
||||||
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
%res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: blendps
|
; CHECK: blendps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
|
||||||
@ -34,35 +34,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
|
|||||||
|
|
||||||
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
|
||||||
; CHECK: dppd
|
; CHECK: dppd
|
||||||
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
|
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
|
declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: dpps
|
; CHECK: dpps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
|
||||||
; CHECK: insertps
|
; CHECK: insertps
|
||||||
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
|
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
|
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
|
||||||
; CHECK: mpsadbw
|
; CHECK: mpsadbw
|
||||||
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
|
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
|
||||||
@ -83,10 +83,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
|
|||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
|
||||||
; CHECK: pblendw
|
; CHECK: pblendw
|
||||||
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
|
%res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
|
||||||
ret <8 x i16> %res
|
ret <8 x i16> %res
|
||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
|
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
|
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
|
||||||
|
@ -618,7 +618,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
|
|||||||
; X32: movl 8(%esp), %ecx
|
; X32: movl 8(%esp), %ecx
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
;; Try to match a bit more of the instr, since we need the load's offset.
|
;; Try to match a bit more of the instr, since we need the load's offset.
|
||||||
; CHECK: insertps $192, 12(%{{...}},%{{...}}), %
|
; CHECK: insertps $-64, 12(%{{...}},%{{...}}), %
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
|
%1 = getelementptr inbounds <4 x float>* %pb, i64 %index
|
||||||
%2 = load <4 x float>* %1, align 16
|
%2 = load <4 x float>* %1, align 16
|
||||||
|
@ -19618,22 +19618,36 @@
|
|||||||
// CHECK: blendvps %xmm2, %xmm1 # encoding: [0x66,0x0f,0x38,0x14,0xca]
|
// CHECK: blendvps %xmm2, %xmm1 # encoding: [0x66,0x0f,0x38,0x14,0xca]
|
||||||
blendvps %xmm2, %xmm1
|
blendvps %xmm2, %xmm1
|
||||||
|
|
||||||
// rdar://9795008
|
// These instructions can take an unsigned 8-bit mask as well as a signed 8-bit
|
||||||
// These instructions take a mask not an 8-bit sign extended value.
|
// immediate. Check both forms here.
|
||||||
// CHECK: blendps $129, %xmm2, %xmm1
|
// CHECK: blendps $129, %xmm2, %xmm1
|
||||||
blendps $0x81, %xmm2, %xmm1
|
blendps $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: blendps $-64, %xmm2, %xmm1
|
||||||
|
blendps $-64, %xmm2, %xmm1
|
||||||
// CHECK: blendpd $129, %xmm2, %xmm1
|
// CHECK: blendpd $129, %xmm2, %xmm1
|
||||||
blendpd $0x81, %xmm2, %xmm1
|
blendpd $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: blendpd $-64, %xmm2, %xmm1
|
||||||
|
blendpd $-64, %xmm2, %xmm1
|
||||||
// CHECK: pblendw $129, %xmm2, %xmm1
|
// CHECK: pblendw $129, %xmm2, %xmm1
|
||||||
pblendw $0x81, %xmm2, %xmm1
|
pblendw $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: pblendw $-64, %xmm2, %xmm1
|
||||||
|
pblendw $-64, %xmm2, %xmm1
|
||||||
// CHECK: mpsadbw $129, %xmm2, %xmm1
|
// CHECK: mpsadbw $129, %xmm2, %xmm1
|
||||||
mpsadbw $0x81, %xmm2, %xmm1
|
mpsadbw $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: mpsadbw $-64, %xmm2, %xmm1
|
||||||
|
mpsadbw $-64, %xmm2, %xmm1
|
||||||
// CHECK: dpps $129, %xmm2, %xmm1
|
// CHECK: dpps $129, %xmm2, %xmm1
|
||||||
dpps $0x81, %xmm2, %xmm1
|
dpps $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: dpps $-64, %xmm2, %xmm1
|
||||||
|
dpps $-64, %xmm2, %xmm1
|
||||||
// CHECK: dppd $129, %xmm2, %xmm1
|
// CHECK: dppd $129, %xmm2, %xmm1
|
||||||
dppd $0x81, %xmm2, %xmm1
|
dppd $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: dppd $-64, %xmm2, %xmm1
|
||||||
|
dppd $-64, %xmm2, %xmm1
|
||||||
// CHECK: insertps $129, %xmm2, %xmm1
|
// CHECK: insertps $129, %xmm2, %xmm1
|
||||||
insertps $0x81, %xmm2, %xmm1
|
insertps $0x81, %xmm2, %xmm1
|
||||||
|
// CHECK: insertps $-64, %xmm2, %xmm1
|
||||||
|
insertps $-64, %xmm2, %xmm1
|
||||||
|
|
||||||
// PR13253 handle implicit optional third argument that must always be xmm0
|
// PR13253 handle implicit optional third argument that must always be xmm0
|
||||||
// CHECK: pblendvb %xmm2, %xmm1
|
// CHECK: pblendvb %xmm2, %xmm1
|
||||||
|
@ -912,7 +912,6 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
|
|||||||
TYPE("i32mem", TYPE_Mv)
|
TYPE("i32mem", TYPE_Mv)
|
||||||
TYPE("i32imm", TYPE_IMMv)
|
TYPE("i32imm", TYPE_IMMv)
|
||||||
TYPE("i32i8imm", TYPE_IMM32)
|
TYPE("i32i8imm", TYPE_IMM32)
|
||||||
TYPE("u32u8imm", TYPE_IMM32)
|
|
||||||
TYPE("GR32", TYPE_R32)
|
TYPE("GR32", TYPE_R32)
|
||||||
TYPE("GR32orGR64", TYPE_R32)
|
TYPE("GR32orGR64", TYPE_R32)
|
||||||
TYPE("i64mem", TYPE_Mv)
|
TYPE("i64mem", TYPE_Mv)
|
||||||
@ -1015,7 +1014,6 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
|
|||||||
ENCODING("i16imm", ENCODING_IW)
|
ENCODING("i16imm", ENCODING_IW)
|
||||||
}
|
}
|
||||||
ENCODING("i32i8imm", ENCODING_IB)
|
ENCODING("i32i8imm", ENCODING_IB)
|
||||||
ENCODING("u32u8imm", ENCODING_IB)
|
|
||||||
ENCODING("SSECC", ENCODING_IB)
|
ENCODING("SSECC", ENCODING_IB)
|
||||||
ENCODING("AVXCC", ENCODING_IB)
|
ENCODING("AVXCC", ENCODING_IB)
|
||||||
ENCODING("AVX512RC", ENCODING_IB)
|
ENCODING("AVX512RC", ENCODING_IB)
|
||||||
|
Loading…
Reference in New Issue
Block a user