1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
Geoff Berry e88f26718b Revert "[MachineCopyPropagation] Extend pass to do COPY source forwarding"
This reverts commit r311038.

Several buildbots are breaking, and at least one appears to be due to
the forwarding of physical regs enabled by this change.  Reverting while
I investigate further.

llvm-svn: 311062
2017-08-17 04:04:11 +00:00

251 lines
11 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}vgpr:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%x = fadd float %arg3, 1.000000e+00
%a = insertvalue { float, float } undef, float %x, 0
%b = insertvalue { float, float } %a, float %arg3, 1
ret { float, float } %b
}
; GCN-LABEL: {{^}}vgpr_literal:
; GCN: v_mov_b32_e32 v4, v0
; GCN: exp mrt0 v4, v4, v4, v4 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN-DAG: v_mov_b32_e32 v3, -1.0
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 562
; GCN-LABEL: {{^}}vgpr_ps_addr0:
; GCN-NOT: v_mov_b32_e32 v0
; GCN-NOT: v_mov_b32_e32 v1
; GCN-NOT: v_mov_b32_e32 v2
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v6
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 1
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 1
; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
; GCN: v_mov_b32_e32 v0, 1.0
; GCN-NOT: s_endpgm
define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
ret float 1.000000e+00
}
; GCN: .long 165580
; GCN-NEXT: .long 2081
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 2081
; GCN-LABEL: {{^}}ps_input_ena_pos_w:
; GCN-DAG: v_mov_b32_e32 v0, v4
; GCN-DAG: v_mov_b32_e32 v1, v2
; GCN: v_mov_b32_e32 v2, v3
; GCN-NOT: s_endpgm
define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
bb:
%f = bitcast <2 x i32> %arg8 to <2 x float>
%s = insertvalue { float, <2 x float> } undef, float %arg14, 0
%s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
ret { float, <2 x float> } %s1
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 563
; GCN-LABEL: {{^}}vgpr_ps_addr1:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mov_b32_e32 v2, v4
; GCN-DAG: v_mov_b32_e32 v3, v6
; GCN-DAG: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 631
; GCN-LABEL: {{^}}vgpr_ps_addr119:
; GCN-DAG: v_mov_b32_e32 v0, v2
; GCN-DAG: v_mov_b32_e32 v1, v3
; GCN: v_mov_b32_e32 v2, v6
; GCN: v_mov_b32_e32 v3, v8
; GCN: v_mov_b32_e32 v4, v12
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN: .long 165580
; GCN-NEXT: .long 562
; GCN-NEXT: .long 165584
; GCN-NEXT: .long 946
; GCN-LABEL: {{^}}vgpr_ps_addr418:
; GCN-NOT: v_mov_b32_e32 v0
; GCN-NOT: v_mov_b32_e32 v1
; GCN-NOT: v_mov_b32_e32 v2
; GCN: v_mov_b32_e32 v3, v4
; GCN: v_mov_b32_e32 v4, v8
; GCN-NOT: s_endpgm
define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
bb:
%i0 = extractelement <2 x i32> %arg4, i32 0
%i1 = extractelement <2 x i32> %arg4, i32 1
%i2 = extractelement <2 x i32> %arg7, i32 0
%i3 = extractelement <2 x i32> %arg8, i32 0
%f0 = bitcast i32 %i0 to float
%f1 = bitcast i32 %i1 to float
%f2 = bitcast i32 %i2 to float
%f3 = bitcast i32 %i3 to float
%r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
%r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
%r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
%r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
%r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
ret { float, float, float, float, float } %r4
}
; GCN-LABEL: {{^}}sgpr:
; GCN: s_add_i32 s0, s3, 2
; GCN: s_mov_b32 s2, s3
; GCN-NOT: s_endpgm
define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
%a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
%b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
%c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
ret { i32, i32, i32 } %c
}
; GCN-LABEL: {{^}}sgpr_literal:
; GCN: s_mov_b32 s0, 5
; GCN-NOT: s_mov_b32 s0, s0
; GCN-DAG: s_mov_b32 s1, 6
; GCN-DAG: s_mov_b32 s2, 7
; GCN-DAG: s_mov_b32 s3, 8
; GCN-NOT: s_endpgm
define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
%x = add i32 %arg2, 2
ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
}
; GCN-LABEL: {{^}}both:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s1, s2
; GCN: s_mov_b32 s2, s3
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
%v = fadd float %arg3, 1.000000e+00
%s = add i32 %arg2, 2
%a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
%a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
%a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
%a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
%a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
ret { float, i32, float, i32, i32 } %a4
}
; GCN-LABEL: {{^}}structure_literal:
; GCN: v_mov_b32_e32 v3, v0
; GCN: exp mrt0 v3, v3, v3, v3 done vm
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: s_mov_b32 s0, 2
; GCN-DAG: s_mov_b32 s1, 3
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
; GCN: s_waitcnt expcnt(0)
define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(2)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
bb:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind }
attributes #1 = { nounwind "InitialPSInputAddr"="0" }
attributes #2 = { nounwind "InitialPSInputAddr"="1" }
attributes #3 = { nounwind "InitialPSInputAddr"="119" }
attributes #4 = { nounwind "InitialPSInputAddr"="418" }