1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[InstCombine]Avoid introduction of unaligned mem access

InstCombine is able to transform mem transfer instrinsic to alone store or store/load pair.
It might result in generation of unaligned atomic load/store which later in backend
will be transformed to libcall. It is not an evident gain and it is better to keep intrinsic as is
and handle it at backend.

Reviewers: reames, anna, apilipenko, mkazantsev
Reviewed By: reames
Subscribers: t.p.northover, jfb, llvm-commits
Differential Revision: https://reviews.llvm.org/D56582

llvm-svn: 351295
This commit is contained in:
Serguei Katkov 2019-01-16 04:36:26 +00:00
parent f67bc623d8
commit db0a0def25
2 changed files with 178 additions and 93 deletions

View File

@ -136,6 +136,14 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
if (Size > 8 || (Size&(Size-1)))
return nullptr; // If not 1/2/4/8 bytes, exit.
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
// into libcall in CodeGen. This is not evident performance gain so disable
// it now.
if (isa<AtomicMemTransferInst>(MI))
if (CopyDstAlign < Size || CopySrcAlign < Size)
return nullptr;
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
@ -220,6 +228,18 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
Alignment = MI->getDestAlignment();
assert(Len && "0-sized memory setting should be removed already.");
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0)
Alignment = 1;
// If it is an atomic and alignment is less than the size then we will
// introduce the unaligned memory access which will be later transformed
// into libcall in CodeGen. This is not evident performance gain so disable
// it now.
if (isa<AtomicMemSetInst>(MI))
if (Alignment < Len)
return nullptr;
// memset(s,c,n) -> store s, c (for n=1,2,4,8)
if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
@ -229,9 +249,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
// Alignment 0 is identity for alignment 1 for memset, but not store.
if (Alignment == 0) Alignment = 1;
// Extract the fill value and store.
uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,

View File

@ -15,12 +15,9 @@ define void @test_memset_zero_length(i8* %dest) {
define void @test_memset_to_store(i8* %dest) {
; CHECK-LABEL: @test_memset_to_store(
; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 1
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 2, i32 1)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 4, i32 1)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 8, i32 1)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 16, i32 1)
; CHECK-NEXT: ret void
;
@ -34,15 +31,15 @@ define void @test_memset_to_store(i8* %dest) {
define void @test_memset_to_store_2(i8* %dest) {
; CHECK-LABEL: @test_memset_to_store_2(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 2
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 2
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 4, i32 2)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 8, i32 2)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 16, i32 2)
; CHECK-NEXT: ret void
;
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 1, i32 1)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2)
@ -52,13 +49,17 @@ define void @test_memset_to_store_2(i8* %dest) {
define void @test_memset_to_store_4(i8* %dest) {
; CHECK-LABEL: @test_memset_to_store_4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP1]] unordered, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP2]] unordered, align 4
; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 4
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 4
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 8, i32 4)
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 16, i32 4)
; CHECK-NEXT: ret void
;
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 1, i32 1)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 2, i32 2)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4)
@ -67,11 +68,19 @@ define void @test_memset_to_store_4(i8* %dest) {
define void @test_memset_to_store_8(i8* %dest) {
; CHECK-LABEL: @test_memset_to_store_8(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP1]] unordered, align 8
; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 8
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 8
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 8
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 [[DEST]], i8 1, i32 16, i32 8)
; CHECK-NEXT: ret void
;
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 1, i32 1)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 2, i32 2)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 4, i32 4)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8)
ret void
@ -79,9 +88,20 @@ define void @test_memset_to_store_8(i8* %dest) {
define void @test_memset_to_store_16(i8* %dest) {
; CHECK-LABEL: @test_memset_to_store_16(
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 [[DEST:%.*]], i8 1, i32 16, i32 16)
; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 16
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 16
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 16
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 16
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 [[DEST]], i8 1, i32 16, i32 16)
; CHECK-NEXT: ret void
;
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 1, i32 1)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 2, i32 2)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 4, i32 4)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 8, i32 8)
call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16)
ret void
}
@ -134,18 +154,9 @@ define void @test_memmove_loadstore(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memmove_loadstore(
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 1
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 1
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 1
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 1
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 1
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 1
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 2, i32 1)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 4, i32 1)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 8, i32 1)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
; CHECK-NEXT: ret void
;
@ -159,21 +170,18 @@ define void @test_memmove_loadstore(i8* %dest, i8* %src) {
define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memmove_loadstore_2(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i16*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i16, i16* [[TMP1]] unordered, align 2
; CHECK-NEXT: store atomic i16 [[TMP3]], i16* [[TMP2]] unordered, align 2
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, i32* [[TMP4]] unordered, align 2
; CHECK-NEXT: store atomic i32 [[TMP6]], i32* [[TMP5]] unordered, align 2
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = load atomic i64, i64* [[TMP7]] unordered, align 2
; CHECK-NEXT: store atomic i64 [[TMP9]], i64* [[TMP8]] unordered, align 2
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 4, i32 2)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 8, i32 2)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
; CHECK-NEXT: ret void
;
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
@ -183,17 +191,22 @@ define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memmove_loadstore_4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i32*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[TMP2]] unordered, align 4
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP6:%.*]] = load atomic i64, i64* [[TMP4]] unordered, align 4
; CHECK-NEXT: store atomic i64 [[TMP6]], i64* [[TMP5]] unordered, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 8, i32 4)
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
; CHECK-NEXT: ret void
;
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
@ -202,13 +215,26 @@ define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memmove_loadstore_8(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i64*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i64, i64* [[TMP1]] unordered, align 8
; CHECK-NEXT: store atomic i64 [[TMP3]], i64* [[TMP2]] unordered, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
; CHECK-NEXT: ret void
;
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
ret void
@ -216,9 +242,27 @@ define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
define void @test_memmove_loadstore_16(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memmove_loadstore_16(
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
; CHECK-NEXT: ret void
;
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
ret void
}
@ -258,18 +302,9 @@ define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memcpy_loadstore(
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 1
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 1
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 1
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 1
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 1
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 1
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 2, i32 1)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 4, i32 1)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 8, i32 1)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i32 16, i32 1)
; CHECK-NEXT: ret void
;
@ -283,21 +318,18 @@ define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memcpy_loadstore_2(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i16*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i16, i16* [[TMP1]] unordered, align 2
; CHECK-NEXT: store atomic i16 [[TMP3]], i16* [[TMP2]] unordered, align 2
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = load atomic i32, i32* [[TMP4]] unordered, align 2
; CHECK-NEXT: store atomic i32 [[TMP6]], i32* [[TMP5]] unordered, align 2
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = load atomic i64, i64* [[TMP7]] unordered, align 2
; CHECK-NEXT: store atomic i64 [[TMP9]], i64* [[TMP8]] unordered, align 2
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 4, i32 2)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 8, i32 2)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 [[DEST]], i8* align 2 [[SRC]], i32 16, i32 2)
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
@ -307,17 +339,22 @@ define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memcpy_loadstore_4(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i32*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i32*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[TMP2]] unordered, align 4
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP6:%.*]] = load atomic i64, i64* [[TMP4]] unordered, align 4
; CHECK-NEXT: store atomic i64 [[TMP6]], i64* [[TMP5]] unordered, align 4
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 8, i32 4)
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 [[DEST]], i8* align 4 [[SRC]], i32 16, i32 4)
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
@ -326,13 +363,26 @@ define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memcpy_loadstore_8(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SRC:%.*]] to i64*
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST:%.*]] to i64*
; CHECK-NEXT: [[TMP3:%.*]] = load atomic i64, i64* [[TMP1]] unordered, align 8
; CHECK-NEXT: store atomic i64 [[TMP3]], i64* [[TMP2]] unordered, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 [[DEST]], i8* align 8 [[SRC]], i32 16, i32 8)
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
ret void
@ -340,9 +390,27 @@ define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
define void @test_memcpy_loadstore_16(i8* %dest, i8* %src) {
; CHECK-LABEL: @test_memcpy_loadstore_16(
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
; CHECK-NEXT: store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
; CHECK-NEXT: [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
; CHECK-NEXT: store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
; CHECK-NEXT: [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
; CHECK-NEXT: store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
; CHECK-NEXT: [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
; CHECK-NEXT: store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 [[DEST:%.*]], i8* align 16 [[SRC:%.*]], i32 16, i32 16)
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
ret void
}