From 4266920cc1d75931cf096c0d90a7f68b003c6359 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet@google.com>
Date: Tue, 9 Jul 2019 09:53:36 +0000
Subject: [PATCH] Fixing @llvm.memcpy not honoring volatile. This is explicitly
 not addressing target-specific code, or calls to memcpy.

Summary: https://bugs.llvm.org/show_bug.cgi?id=42254

Reviewers: courbet

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63215

llvm-svn: 365449
---
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     | 43 +++++-----
 ...ile-memstores-nooverlapping-load-stores.ll | 83 +++++++++++++++++++
 2 files changed, 107 insertions(+), 19 deletions(-)
 create mode 100644 test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll

diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 93a2fe9515d..5852e693fa9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5775,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                                        MachinePointerInfo DstPtrInfo,
                                        MachinePointerInfo SrcPtrInfo) {
   // Turn a memcpy of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -5801,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
 
-  if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
-                                    (DstAlignCanChange ? 0 : Align),
-                                    (isZeroConstant ? 0 : SrcAlign),
-                                    false, false, CopyFromConstant, true,
-                                    DstPtrInfo.getAddrSpace(),
-                                    SrcPtrInfo.getAddrSpace(),
-                                    MF.getFunction().getAttributes()))
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+          (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+          /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+          SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -5961,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
                                         MachinePointerInfo DstPtrInfo,
                                         MachinePointerInfo SrcPtrInfo) {
   // Turn a memmove of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -5981,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   if (Align > SrcAlign)
     SrcAlign = Align;
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
-
-  if (!TLI.findOptimalMemOpLowering(MemOps, Limit, Size,
-                                    (DstAlignCanChange ? 0 : Align), SrcAlign,
-                                    false, false, false, false,
-                                    DstPtrInfo.getAddrSpace(),
-                                    SrcPtrInfo.getAddrSpace(),
-                                    MF.getFunction().getAttributes()))
+  // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
+  // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
+  // correct code.
+  bool AllowOverlap = false;
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+          /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+          AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+          MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -6066,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
                                uint64_t Size, unsigned Align, bool isVol,
                                MachinePointerInfo DstPtrInfo) {
   // Turn a memset of undef to nop.
+  // FIXME: We need to honor volatile even is Src is undef.
   if (Src.isUndef())
     return Chain;
 
@@ -6082,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     DstAlignCanChange = true;
   bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
-  if (!TLI.findOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
-                                    Size, (DstAlignCanChange ? 0 : Align), 0,
-                                    true, IsZeroVal, false, true,
-                                    DstPtrInfo.getAddrSpace(), ~0u,
-                                    MF.getFunction().getAttributes()))
+  if (!TLI.findOptimalMemOpLowering(
+          MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+          (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+          /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+          /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
+          MF.getFunction().getAttributes()))
     return SDValue();
 
   if (DstAlignCanChange) {
diff --git a/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll b/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
new file mode 100644
index 00000000000..aad332489fb
--- /dev/null
+++ b/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1
+define dso_local void @copy_7_bytes(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 {
+; CHECK-LABEL: copy_7_bytes:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    movl 3(%rsi), %ecx
+; CHECK-NEXT:    movl %ecx, 3(%rdi)
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false)
+  ret void
+}
+define dso_local void @copy_7_bytes_volatile(i8* noalias nocapture, i8* noalias nocapture readonly) nounwind #0 {
+; CHECK-LABEL: copy_7_bytes_volatile:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movb 6(%rsi), %al
+; CHECK-NEXT:    movb %al, 6(%rdi)
+; CHECK-NEXT:    movzwl 4(%rsi), %eax
+; CHECK-NEXT:    movw %ax, 4(%rdi)
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true)
+  ret void
+}
+
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1
+define dso_local void @move_7_bytes(i8* nocapture, i8* nocapture readonly) nounwind #0 {
+; CHECK-LABEL: move_7_bytes:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    movzwl 4(%rsi), %ecx
+; CHECK-NEXT:    movb 6(%rsi), %dl
+; CHECK-NEXT:    movb %dl, 6(%rdi)
+; CHECK-NEXT:    movw %cx, 4(%rdi)
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 false)
+  ret void
+}
+define dso_local void @move_7_bytes_volatile(i8* nocapture, i8* nocapture readonly) nounwind #0 {
+; CHECK-LABEL: move_7_bytes_volatile:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    movzwl 4(%rsi), %ecx
+; CHECK-NEXT:    movb 6(%rsi), %dl
+; CHECK-NEXT:    movb %dl, 6(%rdi)
+; CHECK-NEXT:    movw %cx, 4(%rdi)
+; CHECK-NEXT:    movl %eax, (%rdi)
+; CHECK-NEXT:    retq
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %0, i8* align 1 %1, i64 7, i1 true)
+  ret void
+}
+
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8 , i64, i1 immarg) #1
+define dso_local void @set_7_bytes(i8* noalias nocapture) nounwind #0 {
+; CHECK-LABEL: set_7_bytes:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $16843009, 3(%rdi) # imm = 0x1010101
+; CHECK-NEXT:    movl $16843009, (%rdi) # imm = 0x1010101
+; CHECK-NEXT:    retq
+  tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 false)
+  ret void
+}
+define dso_local void @set_7_bytes_volatile(i8* noalias nocapture) nounwind #0 {
+; CHECK-LABEL: set_7_bytes_volatile:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movb $1, 6(%rdi)
+; CHECK-NEXT:    movw $257, 4(%rdi) # imm = 0x101
+; CHECK-NEXT:    movl $16843009, (%rdi) # imm = 0x1010101
+; CHECK-NEXT:    retq
+  tail call void @llvm.memset.p0i8.i64(i8* align 1 %0, i8 1, i64 7, i1 true)
+  ret void
+}
+
+attributes #0 = { noreturn nounwind uwtable "target-cpu"="x86-64" }
+attributes #1 = { argmemonly nounwind }