mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 12:12:47 +01:00
Don't narrow the load and store in a load+twiddle+store sequence unless
there are clearly no stores between the load and the store. This fixes this miscompile reported as PR7833. This breaks the test/CodeGen/X86/narrow_op-2.ll optimization, which is safe, but awkward to prove safe. Move it to X86's README.txt. llvm-svn: 112861
This commit is contained in:
parent
3080c6e860
commit
6824bfc554
@ -5798,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
||||
return SDValue();
|
||||
|
||||
SDValue N0 = Value.getOperand(0);
|
||||
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
|
||||
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
|
||||
Chain == SDValue(N0.getNode(), 1)) {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N0);
|
||||
if (LD->getBasePtr() != Ptr)
|
||||
return SDValue();
|
||||
|
@ -1915,3 +1915,48 @@ And the following x86 code:
|
||||
It should be possible to eliminate the sign extensions.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
LLVM misses a load+store narrowing opportunity in this code:
|
||||
|
||||
%struct.bf = type { i64, i16, i16, i32 }
|
||||
|
||||
@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2]
|
||||
|
||||
define void @t1() nounwind ssp {
|
||||
entry:
|
||||
%0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
|
||||
%1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
|
||||
%2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2]
|
||||
%3 = load i32* %2, align 1 ; <i32> [#uses=1]
|
||||
%4 = and i32 %3, -65537 ; <i32> [#uses=1]
|
||||
store i32 %4, i32* %2, align 1
|
||||
%5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
|
||||
%6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
|
||||
%7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2]
|
||||
%8 = load i32* %7, align 1 ; <i32> [#uses=1]
|
||||
%9 = and i32 %8, -131073 ; <i32> [#uses=1]
|
||||
store i32 %9, i32* %7, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
LLVM currently emits this:
|
||||
|
||||
movq bfi(%rip), %rax
|
||||
andl $-65537, 8(%rax)
|
||||
movq bfi(%rip), %rax
|
||||
andl $-131073, 8(%rax)
|
||||
ret
|
||||
|
||||
It could narrow the loads and stores to emit this:
|
||||
|
||||
movq bfi(%rip), %rax
|
||||
andb $-2, 10(%rax)
|
||||
movq bfi(%rip), %rax
|
||||
andb $-3, 10(%rax)
|
||||
ret
|
||||
|
||||
The trouble is that there is a TokenFactor between the store and the
|
||||
load, making it non-trivial to determine if there's anything between
|
||||
the load and the store which would prohibit narrowing.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -1,25 +0,0 @@
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
|
||||
%struct.bf = type { i64, i16, i16, i32 }
|
||||
@bfi = external global %struct.bf*
|
||||
|
||||
define void @t1() nounwind ssp {
|
||||
entry:
|
||||
|
||||
; CHECK: andb $-2, 10(
|
||||
; CHECK: andb $-3, 10(
|
||||
|
||||
%0 = load %struct.bf** @bfi, align 8
|
||||
%1 = getelementptr %struct.bf* %0, i64 0, i32 1
|
||||
%2 = bitcast i16* %1 to i32*
|
||||
%3 = load i32* %2, align 1
|
||||
%4 = and i32 %3, -65537
|
||||
store i32 %4, i32* %2, align 1
|
||||
%5 = load %struct.bf** @bfi, align 8
|
||||
%6 = getelementptr %struct.bf* %5, i64 0, i32 1
|
||||
%7 = bitcast i16* %6 to i32*
|
||||
%8 = load i32* %7, align 1
|
||||
%9 = and i32 %8, -131073
|
||||
store i32 %9, i32* %7, align 1
|
||||
ret void
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
; rdar://7860110
|
||||
; RUN: llc < %s | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
|
||||
; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
|
||||
; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.2"
|
||||
|
||||
@ -125,3 +125,30 @@ entry:
|
||||
; X32: movb %cl, 5(%{{.*}})
|
||||
}
|
||||
|
||||
; PR7833
|
||||
|
||||
@g_16 = internal global i32 -1
|
||||
|
||||
; X64: test8:
|
||||
; X64-NEXT: movl _g_16(%rip), %eax
|
||||
; X64-NEXT: movl $0, _g_16(%rip)
|
||||
; X64-NEXT: orl $1, %eax
|
||||
; X64-NEXT: movl %eax, _g_16(%rip)
|
||||
; X64-NEXT: ret
|
||||
define void @test8() nounwind {
|
||||
%tmp = load i32* @g_16
|
||||
store i32 0, i32* @g_16
|
||||
%or = or i32 %tmp, 1
|
||||
store i32 %or, i32* @g_16
|
||||
ret void
|
||||
}
|
||||
|
||||
; X64: test9:
|
||||
; X64-NEXT: orb $1, _g_16(%rip)
|
||||
; X64-NEXT: ret
|
||||
define void @test9() nounwind {
|
||||
%tmp = load i32* @g_16
|
||||
%or = or i32 %tmp, 1
|
||||
store i32 %or, i32* @g_16
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user