1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
Simon Atanasyan 9d10d98e8b [mips] Fix 64-bit address loading in case of applying 32-bit mask to the result
If result of 64-bit address loading combines with 32-bit mask, LLVM
tries to optimize the code and remove "redundant" loading of upper
32-bits of the address. It leads to incorrect code on MIPS64 targets.

MIPS backend creates the following chain of commands to load 64-bit
address in the `MipsTargetLowering::getAddrNonPICSym64` method:
```
(add (shl (add (shl (add %highest(sym), %higher(sym)),
                    16),
               %hi(sym)),
          16),
     %lo(%sym))
```

If the mask presents, LLVM decides to optimize the chain of commands. It
really does not make sense to load upper 32-bits because the 0x0fffffff
mask anyway clears them. After removing redundant commands we get this
chain:
```
(add (shl (%hi(sym), 16), %lo(%sym))
```

There is no patterns matched `(MipsHi (i64 symbol))`. Due a bug in `SYM_32`
predicate definition, backend incorrectly selects a pattern for a 32-bit
symbols and uses the `lui` instruction for loading `%hi(sym)`.

As a result we get incorrect set of instructions with unnecessary 16-bit
left shifting:
```
lui     at,0x0
    R_MIPS_HI16     foo
dsll    at,at,0x10
daddiu  at,at,0
    R_MIPS_LO16     foo
```

This patch resolves two problems:
- Fix `SYM_32/SYM_64` predicates to prevent selection of patterns dedicated
  to 32-bit symbols in case of using N64 ABI.
- Add missed patterns for 64-bit symbols for `%hi/%lo`.

Fix PR42736.

Differential Revision: https://reviews.llvm.org/D66228

llvm-svn: 370268
2019-08-28 22:32:10 +00:00

100 lines
3.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=mips-unknwon-linux-gnu -mcpu=mips32r2 \
; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
; RUN: -verify-machineinstrs | FileCheck -check-prefix=O32 %s
; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n32 \
; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
; RUN: -verify-machineinstrs | FileCheck -check-prefix=N32 %s
; RUN: llc -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r2 -target-abi n64 \
; RUN: -mattr=+use-indirect-jump-hazard,+long-calls,+noabicalls %s -o - \
; RUN: -verify-machineinstrs | FileCheck -check-prefix=N64 %s
declare void @callee()
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1)
@val = internal unnamed_addr global [20 x i32] zeroinitializer, align 4
; Test that the long call sequence uses the hazard barrier instruction variant.
define void @caller() {
; O32-LABEL: caller:
; O32: # %bb.0:
; O32-NEXT: addiu $sp, $sp, -24
; O32-NEXT: .cfi_def_cfa_offset 24
; O32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
; O32-NEXT: .cfi_offset 31, -4
; O32-NEXT: lui $1, %hi(callee)
; O32-NEXT: addiu $25, $1, %lo(callee)
; O32-NEXT: jalr.hb $25
; O32-NEXT: nop
; O32-NEXT: lui $1, %hi(val)
; O32-NEXT: addiu $4, $1, %lo(val)
; O32-NEXT: lui $1, %hi(memset)
; O32-NEXT: addiu $25, $1, %lo(memset)
; O32-NEXT: addiu $5, $zero, 0
; O32-NEXT: jalr.hb $25
; O32-NEXT: addiu $6, $zero, 80
; O32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
; O32-NEXT: jr $ra
; O32-NEXT: addiu $sp, $sp, 24
;
; N32-LABEL: caller:
; N32: # %bb.0:
; N32-NEXT: addiu $sp, $sp, -16
; N32-NEXT: .cfi_def_cfa_offset 16
; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
; N32-NEXT: .cfi_offset 31, -8
; N32-NEXT: lui $1, %hi(callee)
; N32-NEXT: addiu $25, $1, %lo(callee)
; N32-NEXT: jalr.hb $25
; N32-NEXT: nop
; N32-NEXT: lui $1, %hi(val)
; N32-NEXT: addiu $4, $1, %lo(val)
; N32-NEXT: lui $1, %hi(memset)
; N32-NEXT: addiu $25, $1, %lo(memset)
; N32-NEXT: daddiu $5, $zero, 0
; N32-NEXT: jalr.hb $25
; N32-NEXT: daddiu $6, $zero, 80
; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; N32-NEXT: jr $ra
; N32-NEXT: addiu $sp, $sp, 16
;
; N64-LABEL: caller:
; N64: # %bb.0:
; N64-NEXT: daddiu $sp, $sp, -16
; N64-NEXT: .cfi_def_cfa_offset 16
; N64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
; N64-NEXT: .cfi_offset 31, -8
; N64-NEXT: lui $1, %highest(callee)
; N64-NEXT: daddiu $1, $1, %higher(callee)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: daddiu $1, $1, %hi(callee)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: daddiu $25, $1, %lo(callee)
; N64-NEXT: jalr.hb $25
; N64-NEXT: nop
; N64-NEXT: lui $1, %highest(val)
; N64-NEXT: daddiu $1, $1, %higher(val)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: daddiu $1, $1, %hi(val)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: lui $2, %highest(memset)
; N64-NEXT: daddiu $4, $1, %lo(val)
; N64-NEXT: daddiu $1, $2, %higher(memset)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: daddiu $1, $1, %hi(memset)
; N64-NEXT: dsll $1, $1, 16
; N64-NEXT: daddiu $25, $1, %lo(memset)
; N64-NEXT: daddiu $5, $zero, 0
; N64-NEXT: jalr.hb $25
; N64-NEXT: daddiu $6, $zero, 80
; N64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
; N64-NEXT: jr $ra
; N64-NEXT: daddiu $sp, $sp, 16
call void @callee()
call void @llvm.memset.p0i8.i32(i8* align 4 bitcast ([20 x i32]* @val to i8*), i8 0, i32 80, i1 false)
ret void
}