mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
e5351a10fe
of loops. Previously, two consecutive calls to function "func" would result in the following sequence of instructions: 1. load $16, %got(func)($gp) // load address of lazy-binding stub. 2. move $25, $16 3. jalr $25 // jump to lazy-binding stub. 4. nop 5. move $25, $16 6. jalr $25 // jump to lazy-binding stub again. With this patch, the second call directly jumps to func's address, bypassing the lazy-binding resolution routine: 1. load $25, %got(func)($gp) // load address of lazy-binding stub. 2. jalr $25 // jump to lazy-binding stub. 3. nop 4. load $25, %got(func)($gp) // load resolved address of func. 5. jalr $25 // directly jump to func. llvm-svn: 191591
37 lines
1.2 KiB
LLVM
37 lines
1.2 KiB
LLVM
; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
|
|
; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \
|
|
; RUN: FileCheck %s -check-prefix=64
|
|
|
|
%struct.S1 = type { [65536 x i8] }
|
|
|
|
@s1 = external global %struct.S1
|
|
|
|
define void @f() nounwind {
|
|
entry:
|
|
; 32: lui $[[R0:[0-9]+]], 65535
|
|
; 32: addiu $[[R0]], $[[R0]], -24
|
|
; 32: addu $sp, $sp, $[[R0]]
|
|
; 32: lui $[[R1:[0-9]+]], 1
|
|
; 32: addu $[[R1]], $sp, $[[R1]]
|
|
; 32: sw $ra, 20($[[R1]])
|
|
; 64: daddiu $[[R0:[0-9]+]], $zero, 1
|
|
; 64: dsll $[[R0]], $[[R0]], 48
|
|
; 64: daddiu $[[R0]], $[[R0]], -1
|
|
; 64: dsll $[[R0]], $[[R0]], 16
|
|
; 64: daddiu $[[R0]], $[[R0]], -32
|
|
; 64: daddu $sp, $sp, $[[R0]]
|
|
; 64: lui $[[R1:[0-9]+]], 1
|
|
; 64: daddu $[[R1]], $sp, $[[R1]]
|
|
; 64: sd $ra, 24($[[R1]])
|
|
|
|
%agg.tmp = alloca %struct.S1, align 1
|
|
%tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
|
|
call void @f2(%struct.S1* byval %agg.tmp) nounwind
|
|
ret void
|
|
}
|
|
|
|
declare void @f2(%struct.S1* byval)
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|