From 6975bd40b380a9f647df2ebbcfb698849b0202f7 Mon Sep 17 00:00:00 2001 From: Daniel Neilson Date: Fri, 11 May 2018 20:04:50 +0000 Subject: [PATCH] [InstCombine] Handle atomic memset in the same way as regular memset Summary: This change adds handling of the atomic memset intrinsic to the code path that simplifies the regular memset. In practice this means that we will now also expand a small constant-length atomic memset into a single unordered atomic store. Reviewers: apilipenko, skatkov, mkazantsev, anna, reames Reviewed By: reames Subscribers: reames, llvm-commits Differential Revision: https://reviews.llvm.org/D46660 llvm-svn: 332132 --- .../InstCombine/InstCombineCalls.cpp | 8 ++- .../InstCombine/InstCombineInternal.h | 2 +- .../InstCombine/element-atomic-memintrins.ll | 66 +++++++++++++++++-- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e11d49251ad..82a26f348cd 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -199,7 +199,7 @@ Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) { return MI; } -Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { +Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) { unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); if (MI->getDestAlignment() < Alignment) { MI->setDestAlignment(Alignment); @@ -232,6 +232,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, MI->isVolatile()); S->setAlignment(Alignment); + if (isa(MI)) + S->setOrdering(AtomicOrdering::Unordered); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setLength(Constant::getNullValue(LenC->getType())); @@ -1758,8 +1760,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *MTI = dyn_cast(MI)) { if (Instruction *I = SimplifyAnyMemTransfer(MTI)) return I; - } else if (MemSetInst *MSI = dyn_cast(MI)) { - if (Instruction *I = SimplifyMemSet(MSI)) + } else if (auto *MSI = dyn_cast(MI)) { + if (Instruction *I = SimplifyAnyMemSet(MSI)) return I; } diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index b597957eeae..a3a485c2111 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -825,7 +825,7 @@ private: bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyAnyMemTransfer(AnyMemTransferInst *MI); - Instruction *SimplifyMemSet(MemSetInst *MI); + Instruction *SimplifyAnyMemSet(AnyMemSetInst *MI); Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); diff --git a/test/Transforms/InstCombine/element-atomic-memintrins.ll b/test/Transforms/InstCombine/element-atomic-memintrins.ll index 1962483a305..7467bc7f6c7 100644 --- a/test/Transforms/InstCombine/element-atomic-memintrins.ll +++ b/test/Transforms/InstCombine/element-atomic-memintrins.ll @@ -12,13 +12,15 @@ define void @test_memset_zero_length(i8* %dest) { ret void } -; Placeholder test. This will chance once support for lowering atomic memsets is added to instcombine. define void @test_memset_to_store(i8* %dest) { ; CHECK-LABEL: @test_memset_to_store( -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST:%.*]], i8 1, i32 1, i32 1) -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 2, i32 1) -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 4, i32 1) -; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 8, i32 1) +; CHECK-NEXT: store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16* +; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32* +; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64* +; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 1 ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 [[DEST]], i8 1, i32 16, i32 1) ; CHECK-NEXT: ret void ; @@ -30,6 +32,60 @@ define void @test_memset_to_store(i8* %dest) { ret void } +define void @test_memset_to_store_2(i8* %dest) { +; CHECK-LABEL: @test_memset_to_store_2( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i16* +; CHECK-NEXT: store atomic i16 257, i16* [[TMP1]] unordered, align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32* +; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP2]] unordered, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64* +; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 2 +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 [[DEST]], i8 1, i32 16, i32 2) +; CHECK-NEXT: ret void +; + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 16, i32 2) + ret void +} + +define void @test_memset_to_store_4(i8* %dest) { +; CHECK-LABEL: @test_memset_to_store_4( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i32* +; CHECK-NEXT: store atomic i32 16843009, i32* [[TMP1]] unordered, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[DEST]] to i64* +; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP2]] unordered, align 4 +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 [[DEST]], i8 1, i32 16, i32 4) +; CHECK-NEXT: ret void +; + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4) + ret void +} + +define void @test_memset_to_store_8(i8* %dest) { +; CHECK-LABEL: @test_memset_to_store_8( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DEST:%.*]] to i64* +; CHECK-NEXT: store atomic i64 72340172838076673, i64* [[TMP1]] unordered, align 8 +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 [[DEST]], i8 1, i32 16, i32 8) +; CHECK-NEXT: ret void +; + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8) + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8) + ret void +} + +define void @test_memset_to_store_16(i8* %dest) { +; CHECK-LABEL: @test_memset_to_store_16( +; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 [[DEST:%.*]], i8 1, i32 16, i32 16) +; CHECK-NEXT: ret void +; + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16) + ret void +} + declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly