From 4266920cc1d75931cf096c0d90a7f68b003c6359 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Tue, 9 Jul 2019 09:53:36 +0000 Subject: [PATCH] Fixing @llvm.memcpy not honoring volatile. This is explicitly not addressing target-specific code, or calls to memcpy. Summary: https://bugs.llvm.org/show_bug.cgi?id=42254 Reviewers: courbet Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63215 llvm-svn: 365449 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 43 +++++----- ...ile-memstores-nooverlapping-load-stores.ll | 83 +++++++++++++++++++ 2 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 93a2fe9515d..5852e693fa9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5775,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memcpy of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5801,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - (isZeroConstant ? 0 : SrcAlign), - false, false, CopyFromConstant, true, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes())) + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -5961,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { // Turn a memmove of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -5981,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Align > SrcAlign) SrcAlign = Align; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - - if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), SrcAlign, - false, false, false, false, - DstPtrInfo.getAddrSpace(), - SrcPtrInfo.getAddrSpace(), - MF.getFunction().getAttributes())) + // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in + // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the + // correct code. + bool AllowOverlap = false; + if (!TLI.findOptimalMemOpLowering( + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, + /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { @@ -6066,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, uint64_t Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. + // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) return Chain; @@ -6082,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, DstAlignCanChange = true; bool IsZeroVal = isa(Src) && cast(Src)->isNullValue(); - if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), - Size, (DstAlignCanChange ? 0 : Align), 0, - true, IsZeroVal, false, true, - DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + if (!TLI.findOptimalMemOpLowering( + MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, + (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { diff --git a/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll b/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll new file mode 100644 index 00000000000..aad332489fb --- /dev/null +++ b/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 +define dso_local void @copy_7_bytes(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 { +; CHECK-LABEL: copy_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl 3(%rsi), %ecx +; CHECK-NEXT: movl %ecx, 3(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false) + ret void +} +define dso_local void @copy_7_bytes_volatile(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 { +; CHECK-LABEL: copy_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movb 6(%rsi), %al +; CHECK-NEXT: movb %al, 6(%rdi) +; CHECK-NEXT: movzwl 4(%rsi), %eax +; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true) + ret void +} + + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1 +define dso_local void @move_7_bytes(i8* nocapture, i8* nocapture readonly) nounwind #0 { +; CHECK-LABEL: move_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movzwl 4(%rsi), %ecx +; CHECK-NEXT: movb 6(%rsi), %dl +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false) + ret void +} +define dso_local void @move_7_bytes_volatile(i8* nocapture, i8* nocapture readonly) nounwind #0 { +; CHECK-LABEL: move_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movl (%rsi), %eax +; CHECK-NEXT: movzwl 4(%rsi), %ecx +; CHECK-NEXT: movb 6(%rsi), %dl +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true) + ret void +} + + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8 , i64, i1 immarg) #1 +define dso_local void @set_7_bytes(i8* noalias nocapture) nounwind #0 { +; CHECK-LABEL: set_7_bytes: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $16843009, 3(%rdi) # imm = 0x1010101 +; CHECK-NEXT: movl $16843009, (%rdi) # imm = 0x1010101 +; CHECK-NEXT: retq + tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 false) + ret void +} +define dso_local void @set_7_bytes_volatile(i8* noalias nocapture) nounwind #0 { +; CHECK-LABEL: set_7_bytes_volatile: +; CHECK: # %bb.0: +; CHECK-NEXT: movb $1, 6(%rdi) +; CHECK-NEXT: movw $257, 4(%rdi) # imm = 0x101 +; CHECK-NEXT: movl $16843009, (%rdi) # imm = 0x1010101 +; CHECK-NEXT: retq + tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 true) + ret void +} + +attributes #0 = { noreturn nounwind uwtable "target-cpu"="x86-64" } +attributes #1 = { argmemonly nounwind }