1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[DAG] Fix Big Endian in Load-Store forwarding

Summary:
Correct offset calculation in load-store forwarding for big-endian
targets.

Reviewers: rnk, RKSimon, waltl

Subscribers: sdardis, nemanjai, hiraditya, jrtc27, atanasyan, jsji, llvm-commits

Differential Revision: https://reviews.llvm.org/D53147

llvm-svn: 344272
This commit is contained in:
Nirav Dave 2018-10-11 18:28:59 +00:00
parent 214b82aa2f
commit 56ac74189b
3 changed files with 68 additions and 23 deletions

View File

@ -12847,6 +12847,11 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (!STCoversLD)
return SDValue();
// Normalize for Endianness.
if (DAG.getDataLayout().isBigEndian())
Offset =
(STMemType.getSizeInBits() - LDMemType.getSizeInBits()) / 8 - Offset;
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
// Simple case: Direct non-truncating forwarding

View File

@ -2045,29 +2045,29 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS32R5EB-NEXT: jr $ra
; MIPS32R5EB-NEXT: nop
;
; MIPS64R5-LABEL: i32_2:
; MIPS64R5: # %bb.0:
; MIPS64R5-NEXT: daddiu $sp, $sp, -32
; MIPS64R5-NEXT: .cfi_def_cfa_offset 32
; MIPS64R5-NEXT: sd $5, 16($sp)
; MIPS64R5-NEXT: sd $4, 24($sp)
; MIPS64R5-NEXT: ldi.b $w0, 0
; MIPS64R5-NEXT: lw $1, 20($sp)
; MIPS64R5-NEXT: move.v $w1, $w0
; MIPS64R5-NEXT: insert.d $w1[0], $5
; MIPS64R5-NEXT: insert.d $w1[1], $1
; MIPS64R5-NEXT: insert.d $w0[0], $4
; MIPS64R5-NEXT: lw $1, 28($sp)
; MIPS64R5-NEXT: insert.d $w0[1], $1
; MIPS64R5-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5-NEXT: sw $2, 12($sp)
; MIPS64R5-NEXT: sw $1, 8($sp)
; MIPS64R5-NEXT: ld $2, 8($sp)
; MIPS64R5-NEXT: daddiu $sp, $sp, 32
; MIPS64R5-NEXT: jr $ra
; MIPS64R5-NEXT: nop
; MIPS64R5EB-LABEL: i32_2:
; MIPS64R5EB: # %bb.0:
; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32
; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32
; MIPS64R5EB-NEXT: sd $5, 16($sp)
; MIPS64R5EB-NEXT: sd $4, 24($sp)
; MIPS64R5EB-NEXT: ldi.b $w0, 0
; MIPS64R5EB-NEXT: lw $1, 16($sp)
; MIPS64R5EB-NEXT: move.v $w1, $w0
; MIPS64R5EB-NEXT: insert.d $w1[0], $1
; MIPS64R5EB-NEXT: insert.d $w1[1], $5
; MIPS64R5EB-NEXT: lw $1, 24($sp)
; MIPS64R5EB-NEXT: insert.d $w0[0], $1
; MIPS64R5EB-NEXT: insert.d $w0[1], $4
; MIPS64R5EB-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EB-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EB-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EB-NEXT: sw $2, 12($sp)
; MIPS64R5EB-NEXT: sw $1, 8($sp)
; MIPS64R5EB-NEXT: ld $2, 8($sp)
; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32
; MIPS64R5EB-NEXT: jr $ra
; MIPS64R5EB-NEXT: nop
;
; MIPS32R5EL-LABEL: i32_2:
; MIPS32R5EL: # %bb.0:
@ -2093,6 +2093,30 @@ define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
; MIPS32R5EL-NEXT: addiu $sp, $sp, 48
; MIPS32R5EL-NEXT: jr $ra
; MIPS32R5EL-NEXT: nop
;
; MIPS64R5EL-LABEL: i32_2:
; MIPS64R5EL: # %bb.0:
; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32
; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32
; MIPS64R5EL-NEXT: sd $5, 16($sp)
; MIPS64R5EL-NEXT: sd $4, 24($sp)
; MIPS64R5EL-NEXT: ldi.b $w0, 0
; MIPS64R5EL-NEXT: lw $1, 20($sp)
; MIPS64R5EL-NEXT: move.v $w1, $w0
; MIPS64R5EL-NEXT: insert.d $w1[0], $5
; MIPS64R5EL-NEXT: insert.d $w1[1], $1
; MIPS64R5EL-NEXT: insert.d $w0[0], $4
; MIPS64R5EL-NEXT: lw $1, 28($sp)
; MIPS64R5EL-NEXT: insert.d $w0[1], $1
; MIPS64R5EL-NEXT: addv.d $w0, $w0, $w1
; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0]
; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1]
; MIPS64R5EL-NEXT: sw $2, 12($sp)
; MIPS64R5EL-NEXT: sw $1, 8($sp)
; MIPS64R5EL-NEXT: ld $2, 8($sp)
; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32
; MIPS64R5EL-NEXT: jr $ra
; MIPS64R5EL-NEXT: nop
%1 = add <2 x i32> %a, %b
ret <2 x i32> %1
}

View File

@ -0,0 +1,16 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
; The load is to the high byte of the 2-byte store
@g = global i8 -75
define void @f(i16 %v) {
; CHECK-LABEL: f
; CHECK: sth 3, -2(1)
; CHECK: lbz 3, -2(1)
%p32 = alloca i16
store i16 %v, i16* %p32
%p16 = bitcast i16* %p32 to i8*
%tmp = load i8, i8* %p16
store i8 %tmp, i8* @g
ret void
}