mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[X86] Remove the llvm.x86.sse2.storel.dq intrinsic. It hasn't been used in a long time.
llvm-svn: 270677
This commit is contained in:
parent
1352c2ef4f
commit
4710ab1424
@ -537,9 +537,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
|
def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
|
||||||
Intrinsic<[], [llvm_ptr_ty,
|
Intrinsic<[], [llvm_ptr_ty,
|
||||||
llvm_v16i8_ty], [IntrArgMemOnly]>;
|
llvm_v16i8_ty], [IntrArgMemOnly]>;
|
||||||
def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
|
|
||||||
Intrinsic<[], [llvm_ptr_ty,
|
|
||||||
llvm_v4i32_ty], [IntrArgMemOnly]>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Misc.
|
// Misc.
|
||||||
|
@ -189,6 +189,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
Name == "x86.avx.movnt.dq.256" ||
|
Name == "x86.avx.movnt.dq.256" ||
|
||||||
Name == "x86.avx.movnt.pd.256" ||
|
Name == "x86.avx.movnt.pd.256" ||
|
||||||
Name == "x86.avx.movnt.ps.256" ||
|
Name == "x86.avx.movnt.ps.256" ||
|
||||||
|
Name == "x86.sse2.storel.dq" ||
|
||||||
Name == "x86.sse42.crc32.64.8" ||
|
Name == "x86.sse42.crc32.64.8" ||
|
||||||
Name == "x86.avx.vbroadcast.ss" ||
|
Name == "x86.avx.vbroadcast.ss" ||
|
||||||
Name == "x86.avx.vbroadcast.ss.256" ||
|
Name == "x86.avx.vbroadcast.ss.256" ||
|
||||||
@ -419,6 +420,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
|
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
|
||||||
SI->setAlignment(32);
|
SI->setAlignment(32);
|
||||||
|
|
||||||
|
// Remove intrinsic.
|
||||||
|
CI->eraseFromParent();
|
||||||
|
return;
|
||||||
|
} else if (Name == "llvm.x86.sse2.storel.dq") {
|
||||||
|
IRBuilder<> Builder(C);
|
||||||
|
Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
|
||||||
|
|
||||||
|
Value *Arg0 = CI->getArgOperand(0);
|
||||||
|
Value *Arg1 = CI->getArgOperand(1);
|
||||||
|
|
||||||
|
Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
|
||||||
|
Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
|
||||||
|
Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
|
||||||
|
Value *BC = Builder.CreateBitCast(Arg0,
|
||||||
|
PointerType::getUnqual(Elt->getType()),
|
||||||
|
"cast");
|
||||||
|
StoreInst *SI = Builder.CreateStore(Elt, BC);
|
||||||
|
SI->setAlignment(1);
|
||||||
|
|
||||||
// Remove intrinsic.
|
// Remove intrinsic.
|
||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
|
@ -5022,13 +5022,6 @@ def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}",
|
|||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// Store / copy lower 64-bits of a XMM register.
|
// Store / copy lower 64-bits of a XMM register.
|
||||||
//
|
//
|
||||||
let Predicates = [HasAVX] in
|
|
||||||
def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
|
|
||||||
(VMOVPQI2QImr addr:$dst, VR128:$src)>;
|
|
||||||
let Predicates = [UseSSE2] in
|
|
||||||
def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src),
|
|
||||||
(MOVPQI2QImr addr:$dst, VR128:$src)>;
|
|
||||||
|
|
||||||
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
|
let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
|
||||||
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||||
"vmovq\t{$src, $dst|$dst, $src}",
|
"vmovq\t{$src, $dst|$dst, $src}",
|
||||||
|
@ -687,7 +687,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
|||||||
case Intrinsic::x86_sse_storeu_ps:
|
case Intrinsic::x86_sse_storeu_ps:
|
||||||
case Intrinsic::x86_sse2_storeu_pd:
|
case Intrinsic::x86_sse2_storeu_pd:
|
||||||
case Intrinsic::x86_sse2_storeu_dq:
|
case Intrinsic::x86_sse2_storeu_dq:
|
||||||
case Intrinsic::x86_sse2_storel_dq:
|
|
||||||
if (II->getArgOperand(0) == OperandVal)
|
if (II->getArgOperand(0) == OperandVal)
|
||||||
isAddress = true;
|
isAddress = true;
|
||||||
break;
|
break;
|
||||||
@ -712,7 +711,6 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
|
|||||||
case Intrinsic::x86_sse_storeu_ps:
|
case Intrinsic::x86_sse_storeu_ps:
|
||||||
case Intrinsic::x86_sse2_storeu_pd:
|
case Intrinsic::x86_sse2_storeu_pd:
|
||||||
case Intrinsic::x86_sse2_storeu_dq:
|
case Intrinsic::x86_sse2_storeu_dq:
|
||||||
case Intrinsic::x86_sse2_storel_dq:
|
|
||||||
AccessTy.MemTy = II->getArgOperand(0)->getType();
|
AccessTy.MemTy = II->getArgOperand(0)->getType();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX
|
||||||
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL
|
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL
|
||||||
|
|
||||||
@ -1253,37 +1253,19 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
|
|||||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
|
|
||||||
; AVX-LABEL: test_x86_sse2_storel_dq:
|
|
||||||
; AVX: ## BB#0:
|
|
||||||
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; AVX-NEXT: vmovlps %xmm0, (%eax)
|
|
||||||
; AVX-NEXT: retl
|
|
||||||
;
|
|
||||||
; AVX512VL-LABEL: test_x86_sse2_storel_dq:
|
|
||||||
; AVX512VL: ## BB#0:
|
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; AVX512VL-NEXT: vmovlps %xmm0, (%eax)
|
|
||||||
; AVX512VL-NEXT: retl
|
|
||||||
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
|
|
||||||
|
|
||||||
|
|
||||||
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
|
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
|
||||||
; add operation forces the execution domain.
|
; add operation forces the execution domain.
|
||||||
; AVX-LABEL: test_x86_sse2_storeu_dq:
|
; AVX-LABEL: test_x86_sse2_storeu_dq:
|
||||||
; AVX: ## BB#0:
|
; AVX: ## BB#0:
|
||||||
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
|
; AVX-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vmovdqu %xmm0, (%eax)
|
; AVX-NEXT: vmovdqu %xmm0, (%eax)
|
||||||
; AVX-NEXT: retl
|
; AVX-NEXT: retl
|
||||||
;
|
;
|
||||||
; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
|
; AVX512VL-LABEL: test_x86_sse2_storeu_dq:
|
||||||
; AVX512VL: ## BB#0:
|
; AVX512VL: ## BB#0:
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512VL-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
|
; AVX512VL-NEXT: vpaddb LCPI76_0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
|
; AVX512VL-NEXT: vmovdqu %xmm0, (%eax)
|
||||||
; AVX512VL-NEXT: retl
|
; AVX512VL-NEXT: retl
|
||||||
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||||
@ -4208,7 +4190,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
|
|||||||
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
|
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
|
||||||
; AVX512VL: ## BB#0:
|
; AVX512VL: ## BB#0:
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512VL-NEXT: vpaddb LCPI236_0, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpaddb LCPI235_0, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
|
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
|
||||||
; AVX512VL-NEXT: retl
|
; AVX512VL-NEXT: retl
|
||||||
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||||
@ -4449,7 +4431,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
|
|||||||
;
|
;
|
||||||
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
|
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
|
||||||
; AVX512VL: ## BB#0:
|
; AVX512VL: ## BB#0:
|
||||||
; AVX512VL-NEXT: vpermilpd LCPI250_0, %ymm0, %ymm0
|
; AVX512VL-NEXT: vpermilpd LCPI249_0, %ymm0, %ymm0
|
||||||
; AVX512VL-NEXT: retl
|
; AVX512VL-NEXT: retl
|
||||||
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
|
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
|
||||||
ret <4 x double> %res
|
ret <4 x double> %res
|
||||||
@ -4941,7 +4923,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
|
|||||||
; AVX-LABEL: movnt_dq:
|
; AVX-LABEL: movnt_dq:
|
||||||
; AVX: ## BB#0:
|
; AVX: ## BB#0:
|
||||||
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0
|
; AVX-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
|
||||||
; AVX-NEXT: vmovntdq %ymm0, (%eax)
|
; AVX-NEXT: vmovntdq %ymm0, (%eax)
|
||||||
; AVX-NEXT: vzeroupper
|
; AVX-NEXT: vzeroupper
|
||||||
; AVX-NEXT: retl
|
; AVX-NEXT: retl
|
||||||
@ -4949,7 +4931,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
|
|||||||
; AVX512VL-LABEL: movnt_dq:
|
; AVX512VL-LABEL: movnt_dq:
|
||||||
; AVX512VL: ## BB#0:
|
; AVX512VL: ## BB#0:
|
||||||
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; AVX512VL-NEXT: vpaddq LCPI277_0, %xmm0, %xmm0
|
; AVX512VL-NEXT: vpaddq LCPI276_0, %xmm0, %xmm0
|
||||||
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
|
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
|
||||||
; AVX512VL-NEXT: retl
|
; AVX512VL-NEXT: retl
|
||||||
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
|
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
|
||||||
|
@ -42,3 +42,18 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
|
|||||||
ret <2 x i64> %res
|
ret <2 x i64> %res
|
||||||
}
|
}
|
||||||
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
|
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
|
||||||
|
; CHECK-LABEL: test_x86_sse2_storel_dq:
|
||||||
|
; CHECK: ## BB#0:
|
||||||
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
; CHECK-NEXT: movlps %xmm0, (%eax)
|
||||||
|
; CHECK-NEXT: retl
|
||||||
|
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
|
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
|
||||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
|
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
|
||||||
|
|
||||||
@ -1157,37 +1157,19 @@ define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
|
|||||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
|
|
||||||
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
|
|
||||||
; SSE-LABEL: test_x86_sse2_storel_dq:
|
|
||||||
; SSE: ## BB#0:
|
|
||||||
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; SSE-NEXT: movlps %xmm0, (%eax)
|
|
||||||
; SSE-NEXT: retl
|
|
||||||
;
|
|
||||||
; KNL-LABEL: test_x86_sse2_storel_dq:
|
|
||||||
; KNL: ## BB#0:
|
|
||||||
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
; KNL-NEXT: vmovlps %xmm0, (%eax)
|
|
||||||
; KNL-NEXT: retl
|
|
||||||
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
|
|
||||||
|
|
||||||
|
|
||||||
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
|
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
|
||||||
; add operation forces the execution domain.
|
; add operation forces the execution domain.
|
||||||
; SSE-LABEL: test_x86_sse2_storeu_dq:
|
; SSE-LABEL: test_x86_sse2_storeu_dq:
|
||||||
; SSE: ## BB#0:
|
; SSE: ## BB#0:
|
||||||
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; SSE-NEXT: paddb LCPI71_0, %xmm0
|
; SSE-NEXT: paddb LCPI70_0, %xmm0
|
||||||
; SSE-NEXT: movdqu %xmm0, (%eax)
|
; SSE-NEXT: movdqu %xmm0, (%eax)
|
||||||
; SSE-NEXT: retl
|
; SSE-NEXT: retl
|
||||||
;
|
;
|
||||||
; KNL-LABEL: test_x86_sse2_storeu_dq:
|
; KNL-LABEL: test_x86_sse2_storeu_dq:
|
||||||
; KNL: ## BB#0:
|
; KNL: ## BB#0:
|
||||||
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; KNL-NEXT: vpaddb LCPI71_0, %xmm0, %xmm0
|
; KNL-NEXT: vpaddb LCPI70_0, %xmm0, %xmm0
|
||||||
; KNL-NEXT: vmovdqu %xmm0, (%eax)
|
; KNL-NEXT: vmovdqu %xmm0, (%eax)
|
||||||
; KNL-NEXT: retl
|
; KNL-NEXT: retl
|
||||||
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
; RUN: opt < %s -instcombine -S | not grep "store "
|
|
||||||
; PR2296
|
|
||||||
|
|
||||||
@G = common global double 0.000000e+00, align 16
|
|
||||||
|
|
||||||
define void @x(<2 x i64> %y) nounwind {
|
|
||||||
entry:
|
|
||||||
bitcast <2 x i64> %y to <4 x i32>
|
|
||||||
call void @llvm.x86.sse2.storel.dq( i8* bitcast (double* @G to i8*), <4 x i32> %0 ) nounwind
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
|
|
Loading…
Reference in New Issue
Block a user