llvm-mirror/test/CodeGen/Mips/mips16_fpret.ll

; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4


@x = global float 0x41F487E980000000, align 4
@dx = global double 0x41CDCC8BC4800000, align 8
@cx = global { float, float } { float 1.000000e+00, float 9.900000e+01 }, align 4
@dcx = global { double, double } { double 0x42CE5E14A412B480, double 0x423AA4C580DB0000 }, align 8

define float @foox()  {
entry:
  %0 = load float* @x, align 4
  ret float %0
; 1: 	.ent	foox
; 1:	lw	$2, %lo(x)(${{[0-9]+}})
; 1:	jal	__mips16_ret_sf
}

define double @foodx()  {
entry:
  %0 = load double* @dx, align 8
  ret double %0
; 1: 	.ent	foodx
; 1: 	lw	$2, %lo(dx)(${{[0-9]+}})
; 1:	jal	__mips16_ret_df
; 2: 	.ent	foodx
; 2:	lw	$3, 4(${{[0-9]+}})
; 2:	jal	__mips16_ret_df

}

define { float, float } @foocx()  {
entry:
  %retval = alloca { float, float }, align 4
  %cx.real = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)
  %cx.imag = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)
  %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
  %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
  store float %cx.real, float* %real
  store float %cx.imag, float* %imag
  %0 = load { float, float }* %retval
  ret { float, float } %0
; 1: 	.ent	foocx
; 1: 	lw	$2, %lo(cx)(${{[0-9]+}})
; 1:	jal	__mips16_ret_sc
; 2: 	.ent	foocx
; 2:	lw	$3, 4(${{[0-9]+}})
; 2:	jal	__mips16_ret_sc
}

define { double, double } @foodcx()  {
entry:
  %retval = alloca { double, double }, align 8
  %dcx.real = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)
  %dcx.imag = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)
  %real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0
  %imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1
  store double %dcx.real, double* %real
  store double %dcx.imag, double* %imag
  %0 = load { double, double }* %retval
  ret { double, double } %0
; 1: 	.ent	foodcx
; 1: 	lw	${{[0-9]}}, %lo(dcx)(${{[0-9]+}})
; 1:	jal	__mips16_ret_dc
; 2: 	.ent	foodcx
; 2:	lw	${{[0-9]}}, 4(${{[0-9]+}})
; 2:	jal	__mips16_ret_dc
; 3: 	.ent	foodcx
; 3:	lw	$4, 8(${{[0-9]+}})
; 3:	jal	__mips16_ret_dc
; 4: 	.ent	foodcx
; 4:	lw	$5, 12(${{[0-9]+}})
; 4:	jal	__mips16_ret_dc
}
[Mips] Adjust float ABI settings in case of MIPS16 mode. Hard float for mips16 means essentially to compile as soft float but to use a runtime library for soft float that is written with native mips32 floating point instructions (those runtime routines run in mips32 hard float mode). The patch reviewed by Reed Kotler. llvm-svn: 195123 2013-11-19 13:20:17 +01:00			`; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s \| FileCheck %s -check-prefix=1`
			`; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s \| FileCheck %s -check-prefix=2`
			`; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s \| FileCheck %s -check-prefix=3`
			`; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s \| FileCheck %s -check-prefix=4`
Checkin in of first of several patches to finish implementation of mips16/mips32 floating point interoperability. This patch fixes returns from mips16 functions so that if the function was in fact called by a mips32 hard float routine, then values that would have been returned in floating point registers are so returned. Mips16 mode has no floating point instructions so there is no way to load values into floating point registers. This is needed when returning float, double, single complex, double complex in the Mips ABI. Helper functions in libc for mips16 are available to do this. For efficiency purposes, these helper functions have a different calling convention from normal Mips calls. Registers v0,v1,a0,a1 are used to pass parameters instead of a0,a1,a2,a3. This is because v0,v1,a0,a1 are the natural registers used to return floating point values in soft float. These values can then be moved to the appropriate floating point registers with no extra cost. The only register that is modified is ra in this call. The helper functions make sure that the return values are in the floating point registers that they would be in if soft float was not in effect (which it is for mips16, though the soft float is implemented using a mips32 library that uses hard float). llvm-svn: 181641 2013-05-11 00:25:39 +02:00

			`@x = global float 0x41F487E980000000, align 4`
			`@dx = global double 0x41CDCC8BC4800000, align 8`
			`@cx = global { float, float } { float 1.000000e+00, float 9.900000e+01 }, align 4`
			`@dcx = global { double, double } { double 0x42CE5E14A412B480, double 0x423AA4C580DB0000 }, align 8`

			`define float @foox() {`
			`entry:`
			`%0 = load float* @x, align 4`
			`ret float %0`
			`; 1: .ent foox`
			`; 1: lw $2, %lo(x)(${{[0-9]+}})`
			`; 1: jal __mips16_ret_sf`
			`}`

			`define double @foodx() {`
			`entry:`
			`%0 = load double* @dx, align 8`
			`ret double %0`
			`; 1: .ent foodx`
			`; 1: lw $2, %lo(dx)(${{[0-9]+}})`
			`; 1: jal __mips16_ret_df`
			`; 2: .ent foodx`
			`; 2: lw $3, 4(${{[0-9]+}})`
			`; 2: jal __mips16_ret_df`

			`}`

			`define { float, float } @foocx() {`
			`entry:`
			`%retval = alloca { float, float }, align 4`
			`%cx.real = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)`
			`%cx.imag = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)`
			`%real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0`
			`%imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1`
			`store float %cx.real, float* %real`
			`store float %cx.imag, float* %imag`
			`%0 = load { float, float }* %retval`
			`ret { float, float } %0`
			`; 1: .ent foocx`
			`; 1: lw $2, %lo(cx)(${{[0-9]+}})`
			`; 1: jal __mips16_ret_sc`
			`; 2: .ent foocx`
			`; 2: lw $3, 4(${{[0-9]+}})`
			`; 2: jal __mips16_ret_sc`
			`}`

			`define { double, double } @foodcx() {`
			`entry:`
			`%retval = alloca { double, double }, align 8`
			`%dcx.real = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)`
			`%dcx.imag = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)`
			`%real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0`
			`%imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1`
			`store double %dcx.real, double* %real`
			`store double %dcx.imag, double* %imag`
			`%0 = load { double, double }* %retval`
			`ret { double, double } %0`
			`; 1: .ent foodcx`
Allocate local registers in order for optimal coloring. Also avoid locals evicting locals just because they want a cheaper register. Problem: MI Sched knows exactly how many registers we have and assumes they can be colored. In cases where we have large blocks, usually from unrolled loops, greedy coloring fails. This is a source of "regressions" from the MI Scheduler on x86. I noticed this issue on x86 where we have long chains of two-address defs in the same live range. It's easy to see this in matrix multiplication benchmarks like IRSmk and even the unit test misched-matmul.ll. A fundamental difference between the LLVM register allocator and conventional graph coloring is that in our model a live range can't discover its neighbors, it can only verify its neighbors. That's why we initially went for greedy coloring and added eviction to deal with the hard cases. However, for singly defined and two-address live ranges, we can optimally color without visiting neighbors simply by processing the live ranges in instruction order. Other beneficial side effects: It is much easier to understand and debug regalloc for large blocks when the live ranges are allocated in order. Yes, global allocation is still very confusing, but it's nice to be able to comprehend what happened locally. Heuristics could be added to bias register assignment based on instruction locality (think late register pairing, banks...). Intuituvely this will make some test cases that are on the threshold of register pressure more stable. llvm-svn: 187139 2013-07-25 20:35:14 +02:00			`; 1: lw ${{[0-9]}}, %lo(dcx)(${{[0-9]+}})`
Checkin in of first of several patches to finish implementation of mips16/mips32 floating point interoperability. This patch fixes returns from mips16 functions so that if the function was in fact called by a mips32 hard float routine, then values that would have been returned in floating point registers are so returned. Mips16 mode has no floating point instructions so there is no way to load values into floating point registers. This is needed when returning float, double, single complex, double complex in the Mips ABI. Helper functions in libc for mips16 are available to do this. For efficiency purposes, these helper functions have a different calling convention from normal Mips calls. Registers v0,v1,a0,a1 are used to pass parameters instead of a0,a1,a2,a3. This is because v0,v1,a0,a1 are the natural registers used to return floating point values in soft float. These values can then be moved to the appropriate floating point registers with no extra cost. The only register that is modified is ra in this call. The helper functions make sure that the return values are in the floating point registers that they would be in if soft float was not in effect (which it is for mips16, though the soft float is implemented using a mips32 library that uses hard float). llvm-svn: 181641 2013-05-11 00:25:39 +02:00			`; 1: jal __mips16_ret_dc`
			`; 2: .ent foodcx`
Allocate local registers in order for optimal coloring. Also avoid locals evicting locals just because they want a cheaper register. Problem: MI Sched knows exactly how many registers we have and assumes they can be colored. In cases where we have large blocks, usually from unrolled loops, greedy coloring fails. This is a source of "regressions" from the MI Scheduler on x86. I noticed this issue on x86 where we have long chains of two-address defs in the same live range. It's easy to see this in matrix multiplication benchmarks like IRSmk and even the unit test misched-matmul.ll. A fundamental difference between the LLVM register allocator and conventional graph coloring is that in our model a live range can't discover its neighbors, it can only verify its neighbors. That's why we initially went for greedy coloring and added eviction to deal with the hard cases. However, for singly defined and two-address live ranges, we can optimally color without visiting neighbors simply by processing the live ranges in instruction order. Other beneficial side effects: It is much easier to understand and debug regalloc for large blocks when the live ranges are allocated in order. Yes, global allocation is still very confusing, but it's nice to be able to comprehend what happened locally. Heuristics could be added to bias register assignment based on instruction locality (think late register pairing, banks...). Intuituvely this will make some test cases that are on the threshold of register pressure more stable. llvm-svn: 187139 2013-07-25 20:35:14 +02:00			`; 2: lw ${{[0-9]}}, 4(${{[0-9]+}})`
Checkin in of first of several patches to finish implementation of mips16/mips32 floating point interoperability. This patch fixes returns from mips16 functions so that if the function was in fact called by a mips32 hard float routine, then values that would have been returned in floating point registers are so returned. Mips16 mode has no floating point instructions so there is no way to load values into floating point registers. This is needed when returning float, double, single complex, double complex in the Mips ABI. Helper functions in libc for mips16 are available to do this. For efficiency purposes, these helper functions have a different calling convention from normal Mips calls. Registers v0,v1,a0,a1 are used to pass parameters instead of a0,a1,a2,a3. This is because v0,v1,a0,a1 are the natural registers used to return floating point values in soft float. These values can then be moved to the appropriate floating point registers with no extra cost. The only register that is modified is ra in this call. The helper functions make sure that the return values are in the floating point registers that they would be in if soft float was not in effect (which it is for mips16, though the soft float is implemented using a mips32 library that uses hard float). llvm-svn: 181641 2013-05-11 00:25:39 +02:00			`; 2: jal __mips16_ret_dc`
			`; 3: .ent foodcx`
			`; 3: lw $4, 8(${{[0-9]+}})`
			`; 3: jal __mips16_ret_dc`
			`; 4: .ent foodcx`
			`; 4: lw $5, 12(${{[0-9]+}})`
			`; 4: jal __mips16_ret_dc`
			`}`