1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-26 22:42:46 +02:00
llvm-mirror/test/CodeGen/PowerPC/load-two-flts.ll
Hal Finkel fb163aaea6 [PowerPC] Load two floats directly instead of using one 64-bit integer load
When dealing with complex<float>, and similar structures with two
single-precision floating-point numbers, especially when such things are being
passed around by value, we'll sometimes end up loading both float values by
extracting them from one 64-bit integer load. It looks like this:

  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
      t16: i64 = srl t13, Constant:i32<32>
    t17: i32 = truncate t16
  t18: f32 = bitcast t17
    t19: i32 = truncate t13
  t20: f32 = bitcast t19

The problem, especially before the P8 where those bitcasts aren't legal (and
get expanded via the stack), is that it would have been better to use two
floating-point loads directly. Here we add a target-specific DAGCombine to do
just that. In short, we turn:

	ld 3, 0(5)
	stw 3, -8(1)
	rldicl 3, 3, 32, 32
	stw 3, -4(1)
	lfs 3, -4(1)
	lfs 0, -8(1)

into:

        lfs 3, 4(5)
        lfs 0, 0(5)

llvm-svn: 264988
2016-03-31 02:56:05 +00:00

61 lines
2.0 KiB
LLVM

; RUN: llc < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
define void @_Z4testSt7complexIfE(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:
%v2 = load i64, i64* %ref.tmp, align 8
%v3 = lshr i64 %v2, 32
%v4 = trunc i64 %v3 to i32
%v5 = bitcast i32 %v4 to float
%v6 = trunc i64 %v2 to i32
%v7 = bitcast i32 %v6 to float
%mul_ad.i.i = fmul fast float %v5, %v1
%mul_bc.i.i = fmul fast float %v7, %v0
%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
%mul_ac.i.i = fmul fast float %v5, %v0
%mul_bd.i.i = fmul fast float %v7, %v1
%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
ret void
; CHECK-LABEL: @_Z4testSt7complexIfE
; CHECK-NOT: ld {{[0-9]+}}, 0(5)
; CHECK-NOT: stw
; CHECK-NOT: rldicl
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
; CHECK-DAG: lfs {{[0-9]+}}, 0(5)
; CHECK: blr
}
define i64* @_Z4testSt7complexIfE_idx(float %v0, float %v1, i64* %ref.tmp, float* %_M_value.realp.i.i, float* %_M_value.imagp.i.i) {
entry:
%r = getelementptr i64, i64* %ref.tmp, i64 1
%v2 = load i64, i64* %r, align 8
%v3 = lshr i64 %v2, 32
%v4 = trunc i64 %v3 to i32
%v5 = bitcast i32 %v4 to float
%v6 = trunc i64 %v2 to i32
%v7 = bitcast i32 %v6 to float
%mul_ad.i.i = fmul fast float %v5, %v1
%mul_bc.i.i = fmul fast float %v7, %v0
%mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
%mul_ac.i.i = fmul fast float %v5, %v0
%mul_bd.i.i = fmul fast float %v7, %v1
%mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
ret i64* %r
; CHECK-LABEL: @_Z4testSt7complexIfE
; CHECK-NOT: ld {{[0-9]+}}, 8(5)
; CHECK-NOT: ldu {{[0-9]+}}, 8(5)
; CHECK-NOT: stw
; CHECK-NOT: rldicl
; CHECK-DAG: lfsu {{[0-9]+}}, 8(5)
; CHECK-DAG: lfs {{[0-9]+}}, 4(5)
; CHECK: blr
}