mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
cc12b285b6
This will currently accept the old number of bytes syntax, and convert it to a scalar. This should be removed in the near future (I think I converted all of the tests already, but likely missed a few). Not sure what the exact syntax and policy should be. We can continue printing the number of bytes for non-generic instructions to avoid test churn and only allow non-scalar types for generic instructions. This will currently print the LLT in parentheses, but accept parsing the existing integers and implicitly converting to scalar. The parentheses are a bit ugly, but the parser logic seems unable to deal without either parentheses or some keyword to indicate the start of a type.
335 lines
20 KiB
YAML
335 lines
20 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
|
|
|
|
--- |
|
|
%struct.widget.0 = type { float, i32, i32 }
|
|
%struct.baz = type { <4 x float>, <4 x float>, <2 x float>, i32, i32 }
|
|
%struct.snork = type { float, float, float, i32, float, float, float, float, %struct.spam }
|
|
%struct.spam = type { %struct.zot, [16 x i8] }
|
|
%struct.zot = type { float, float, float, float, <4 x float> }
|
|
%struct.wombat = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [2 x i16], [2 x i16] }
|
|
%struct.wombat.1 = type { [4 x i32], [4 x i32], [4 x i32], [4 x i32], i32, i32, i32, i32 }
|
|
|
|
@sched_dbg_value_crash.tmp6 = internal unnamed_addr addrspace(3) global [256 x [16 x i8]] undef, align 16
|
|
|
|
define amdgpu_kernel void @sched_dbg_value_crash(i8 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture readonly %arg1, %struct.widget.0 addrspace(1)* nocapture readonly %arg2, %struct.baz addrspace(1)* nocapture readonly %arg3, %struct.snork addrspace(1)* nocapture %arg4) local_unnamed_addr #2 {
|
|
bb:
|
|
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
|
%tmp5 = alloca %struct.wombat, align 16, addrspace(5)
|
|
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
|
%2 = bitcast i8 addrspace(4)* %1 to i32 addrspace(4)*
|
|
%3 = getelementptr inbounds i32, i32 addrspace(4)* %2, i64 1
|
|
%4 = bitcast i32 addrspace(4)* %3 to <2 x i32> addrspace(4)*, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
|
%5 = load <2 x i32>, <2 x i32> addrspace(4)* %4, align 4, !invariant.load !3
|
|
%6 = extractelement <2 x i32> %5, i32 0
|
|
%7 = extractelement <2 x i32> %5, i32 1
|
|
%8 = lshr i32 %6, 16
|
|
%9 = call i32 @llvm.amdgcn.workitem.id.x(), !range !4
|
|
%10 = call i32 @llvm.amdgcn.workitem.id.y(), !range !4
|
|
%11 = call i32 @llvm.amdgcn.workitem.id.z(), !range !4
|
|
%12 = mul nuw nsw i32 %8, %7
|
|
%13 = mul i32 %12, %9
|
|
%14 = mul nuw nsw i32 %10, %7
|
|
%15 = add i32 %13, %14
|
|
%16 = add i32 %15, %11
|
|
%17 = getelementptr inbounds [256 x [16 x i8]], [256 x [16 x i8]] addrspace(3)* @sched_dbg_value_crash.tmp6, i32 0, i32 %16
|
|
%tmp7 = load i64, i64 addrspace(4)* null, align 536870912
|
|
%tmp8 = tail call i32 @llvm.amdgcn.workitem.id.x() #3, !range !4
|
|
%tmp9 = zext i32 %tmp8 to i64
|
|
%tmp10 = add i64 %tmp7, %tmp9
|
|
%tmp11 = shl i64 %tmp10, 32
|
|
%tmp12 = ashr exact i64 %tmp11, 32
|
|
%tmp13 = getelementptr inbounds %struct.widget.0, %struct.widget.0 addrspace(1)* %arg2, i64 %tmp12, i32 1
|
|
%tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
|
|
%tmp15 = getelementptr inbounds %struct.baz, %struct.baz addrspace(1)* %arg3, i64 %tmp12, i32 1
|
|
%tmp16 = load <4 x float>, <4 x float> addrspace(1)* %tmp15, align 16
|
|
%tmp17 = sext i32 %tmp14 to i64
|
|
%tmp18 = load i32, i32 addrspace(1)* %0, align 4
|
|
%tmp19 = zext i32 %tmp18 to i64
|
|
%tmp20 = shl nuw nsw i64 %tmp19, 2
|
|
%tmp21 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp20
|
|
%tmp22 = bitcast i8 addrspace(1)* %tmp21 to %struct.wombat.1 addrspace(1)*
|
|
%tmp23 = bitcast %struct.wombat addrspace(5)* %tmp5 to i8 addrspace(5)*
|
|
call void @llvm.lifetime.start.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
|
|
%tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 6
|
|
%tmp25 = getelementptr i32, i32 addrspace(1)* %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
|
%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
|
|
%tmp27 = zext i32 %tmp26 to i64
|
|
%tmp28 = shl nuw nsw i64 %tmp27, 2
|
|
%tmp29 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp28
|
|
%tmp30 = bitcast i8 addrspace(1)* %tmp29 to <2 x float> addrspace(1)*
|
|
%tmp31 = getelementptr inbounds %struct.wombat.1, %struct.wombat.1 addrspace(1)* %tmp22, i64 %tmp17, i32 2, i64 0
|
|
%18 = bitcast i32 addrspace(1)* %tmp31 to <3 x i32> addrspace(1)*
|
|
%19 = load <3 x i32>, <3 x i32> addrspace(1)* %18, align 4
|
|
%tmp325 = extractelement <3 x i32> %19, i32 0
|
|
%tmp386 = extractelement <3 x i32> %19, i32 1
|
|
%tmp447 = extractelement <3 x i32> %19, i32 2
|
|
%tmp33 = sext i32 %tmp325 to i64
|
|
%tmp34 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp33
|
|
%tmp35 = load <2 x float>, <2 x float> addrspace(1)* %tmp34, align 8
|
|
%tmp36 = extractelement <2 x float> %tmp35, i32 1
|
|
%tmp39 = sext i32 %tmp386 to i64
|
|
%tmp40 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp39
|
|
%tmp41 = load <2 x float>, <2 x float> addrspace(1)* %tmp40, align 8
|
|
%tmp42 = extractelement <2 x float> %tmp41, i32 1
|
|
%tmp45 = sext i32 %tmp447 to i64
|
|
%tmp46 = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %tmp30, i64 %tmp45
|
|
%tmp47 = load <2 x float>, <2 x float> addrspace(1)* %tmp46, align 8
|
|
%tmp48 = extractelement <2 x float> %tmp47, i32 1
|
|
%tmp49 = getelementptr i32, i32 addrspace(1)* %arg1, i64 1, !amdgpu.uniform !3, !amdgpu.noclobber !3
|
|
%tmp50 = load i32, i32 addrspace(1)* %tmp49, align 4
|
|
%tmp51 = zext i32 %tmp50 to i64
|
|
%tmp52 = shl nuw nsw i64 %tmp51, 2
|
|
%tmp53 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp52
|
|
%tmp54 = bitcast i8 addrspace(1)* %tmp53 to <4 x float> addrspace(1)*
|
|
%tmp55 = getelementptr inbounds %struct.wombat.1, %struct.wombat.1 addrspace(1)* %tmp22, i64 %tmp17, i32 0, i64 0
|
|
%20 = bitcast i32 addrspace(1)* %tmp55 to <2 x i32> addrspace(1)*
|
|
%21 = load <2 x i32>, <2 x i32> addrspace(1)* %20, align 4
|
|
%tmp568 = extractelement <2 x i32> %21, i32 0
|
|
%tmp639 = extractelement <2 x i32> %21, i32 1
|
|
%tmp57 = sext i32 %tmp568 to i64
|
|
%tmp58 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp54, i64 %tmp57
|
|
%tmp59 = load <4 x float>, <4 x float> addrspace(1)* %tmp58, align 16
|
|
%tmp60 = extractelement <4 x float> %tmp59, i32 0
|
|
%tmp61 = extractelement <4 x float> %tmp59, i32 1
|
|
%tmp64 = sext i32 %tmp639 to i64
|
|
%tmp65 = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %tmp54, i64 %tmp64
|
|
%tmp66 = load <4 x float>, <4 x float> addrspace(1)* %tmp65, align 16
|
|
%tmp67 = extractelement <4 x float> %tmp16, i64 0
|
|
%tmp69 = fsub fast float -0.000000e+00, %tmp67
|
|
%tmp70 = fmul float %tmp67, 0.000000e+00
|
|
%tmp = fmul fast float %tmp67, undef
|
|
%tmp71 = fsub fast float %tmp, %tmp70
|
|
%tmp73 = fadd fast float %tmp, undef
|
|
%tmp74 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %tmp69, i32 0
|
|
%tmp75 = insertelement <4 x float> %tmp74, float %tmp71, i32 1
|
|
%tmp76 = insertelement <4 x float> %tmp75, float %tmp73, i32 2
|
|
store <4 x float> %tmp76, <4 x float> addrspace(5)* %tmp24, align 16
|
|
%tmp77 = fsub float undef, %tmp60
|
|
%tmp78 = fsub float undef, %tmp61
|
|
%tmp79 = extractelement <4 x float> %tmp66, i32 2
|
|
%tmp80 = extractelement <4 x float> %tmp59, i32 2
|
|
%tmp81 = fsub float %tmp79, %tmp80
|
|
%tmp82 = fmul fast float %tmp81, undef
|
|
%tmp83 = fmul fast float %tmp78, undef
|
|
%tmp84 = fadd fast float %tmp83, %tmp77
|
|
%tmp85 = fadd fast float %tmp84, undef
|
|
%tmp86 = fmul float %tmp82, %tmp82
|
|
%tmp87 = fdiv float 1.000000e+00, %tmp86
|
|
tail call void @llvm.dbg.value(metadata float %tmp87, metadata !5, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !8
|
|
%tmp88 = fmul float %tmp82, 0.000000e+00
|
|
%tmp89 = fsub fast float %tmp85, %tmp88
|
|
%tmp90 = fdiv float %tmp89, %tmp86
|
|
%tmp91 = fsub float 1.000000e+00, %tmp87
|
|
%tmp92 = fsub float %tmp91, %tmp90
|
|
%tmp93 = fmul float %tmp42, %tmp87
|
|
%tmp94 = call float @llvm.fmuladd.f32(float %tmp92, float %tmp36, float %tmp93)
|
|
%tmp95 = call float @llvm.fmuladd.f32(float %tmp48, float undef, float %tmp94)
|
|
%tmp96 = fsub float extractelement (<2 x float> fadd (<2 x float> fmul (<2 x float> undef, <2 x float> undef), <2 x float> undef), i64 1), %tmp95
|
|
%tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 8, i32 1
|
|
call void @func(float %tmp96, i64 0, i16 addrspace(5)* nonnull %tmp97) #3
|
|
%tmp984 = bitcast [16 x i8] addrspace(3)* %17 to i8 addrspace(3)*
|
|
%tmp99 = getelementptr inbounds %struct.snork, %struct.snork addrspace(1)* %arg4, i64 %tmp12, i32 8, i32 1, i64 0
|
|
call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %tmp99, i8 addrspace(3)* %tmp984, i64 16, i32 16, i1 false)
|
|
call void @llvm.lifetime.end.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
|
|
ret void
|
|
}
|
|
|
|
declare void @func(float, i64, i16 addrspace(5)*)
|
|
declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #0
|
|
declare float @llvm.fmuladd.f32(float, float, float) #1
|
|
declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #0
|
|
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
|
|
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #1
|
|
declare i32 @llvm.amdgcn.workitem.id.y() #1
|
|
declare i32 @llvm.amdgcn.workitem.id.z() #1
|
|
declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
|
|
declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0
|
|
|
|
attributes #0 = { argmemonly nounwind }
|
|
attributes #1 = { nounwind readnone speculatable }
|
|
attributes #2 = { convergent nounwind "amdgpu-dispatch-ptr" "amdgpu-flat-scratch" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="gfx900" }
|
|
attributes #3 = { nounwind }
|
|
|
|
!llvm.dbg.cu = !{!0}
|
|
!llvm.module.flags = !{!2}
|
|
|
|
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
|
!1 = !DIFile(filename: "foo.cl", directory: "/dev/null")
|
|
!2 = !{i32 2, !"Debug Info Version", i32 3}
|
|
!3 = !{}
|
|
!4 = !{i32 0, i32 256}
|
|
!5 = !DILocalVariable(name: "bar", scope: !6, file: !1, line: 102, type: !7)
|
|
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 81, isLocal: false, isDefinition: true, scopeLine: 86, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
|
|
!7 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
|
|
!8 = !DILocation(line: 102, column: 8, scope: !6)
|
|
|
|
...
|
|
---
|
|
|
|
# CHECK: name: sched_dbg_value_crash
|
|
# CHECK: DBG_VALUE %99, $noreg, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
|
|
|
|
name: sched_dbg_value_crash
|
|
alignment: 1
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
tracksRegLiveness: true
|
|
liveins:
|
|
- { reg: '$vgpr0', virtual-reg: '%0' }
|
|
- { reg: '$vgpr1', virtual-reg: '%1' }
|
|
- { reg: '$vgpr2', virtual-reg: '%2' }
|
|
- { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
|
|
- { reg: '$sgpr6_sgpr7', virtual-reg: '%4' }
|
|
fixedStack:
|
|
stack:
|
|
- { id: 0, name: tmp5, type: default, offset: 0, size: 128, alignment: 16,
|
|
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
|
local-offset: 0, debug-info-variable: '', debug-info-expression: '',
|
|
debug-info-location: '' }
|
|
constants:
|
|
body: |
|
|
bb.0.bb:
|
|
liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr32, $sgpr101
|
|
|
|
%4:sgpr_64 = COPY $sgpr6_sgpr7
|
|
%3:sgpr_64 = COPY $sgpr4_sgpr5
|
|
%2:vgpr_32 = COPY $vgpr2
|
|
%1:vgpr_32 = COPY $vgpr1
|
|
%0:vgpr_32 = COPY $vgpr0
|
|
%5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`)
|
|
%6:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 8, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`)
|
|
%7:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 16, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`)
|
|
%8:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 24, 0
|
|
%9:sreg_64_xexec = S_LOAD_DWORDX2_IMM %4, 32, 0
|
|
%10:sreg_64_xexec = S_LOAD_DWORDX2_IMM %3, 4, 0
|
|
%11:sreg_32_xm0 = S_LSHR_B32 %10.sub0, 16, implicit-def dead $scc
|
|
%12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1
|
|
%13:vgpr_32 = V_MUL_LO_I32_e64 0, %0, implicit $exec
|
|
%14:vgpr_32 = V_MUL_LO_I32_e64 %1, %10.sub1, implicit $exec
|
|
%15:vgpr_32 = V_ADD_CO_U32_e32 0, %13, implicit-def dead $vcc, implicit $exec
|
|
%16:vgpr_32 = V_ADD_CO_U32_e32 0, %15, implicit-def dead $vcc, implicit $exec
|
|
%17:vgpr_32 = IMPLICIT_DEF
|
|
%18:sreg_64 = S_MOV_B64 0
|
|
%19:sreg_32_xm0_xexec = IMPLICIT_DEF
|
|
%20:vgpr_32 = V_ADD_CO_U32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
|
|
%21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32_e64 %20, 12, %7, 0, implicit $exec
|
|
%23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, implicit $exec
|
|
%24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32_e64 %20, 48, %8, 0, implicit $exec
|
|
%26:vreg_128 = IMPLICIT_DEF
|
|
undef %27.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 0, 0
|
|
%27.sub1:sreg_64_xexec = S_MOV_B32 0
|
|
%28:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
|
|
undef %29.sub0:sreg_64 = S_ADD_U32 %5.sub0, %28.sub0, implicit-def $scc
|
|
%29.sub1:sreg_64 = S_ADDC_U32 %5.sub1, %28.sub1, implicit-def dead $scc, implicit killed $scc
|
|
undef %30.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %6, 4, 0
|
|
%27.sub0:sreg_64_xexec = IMPLICIT_DEF
|
|
%31:sreg_64 = S_LSHL_B64 %27, 2, implicit-def dead $scc
|
|
%32:sreg_32_xm0 = S_ADD_U32 0, %31.sub0, implicit-def $scc
|
|
%33:sgpr_32 = S_ADDC_U32 %5.sub1, %31.sub1, implicit-def dead $scc, implicit killed $scc
|
|
%34:vgpr_32 = IMPLICIT_DEF
|
|
%35:vreg_64, dead %36:sreg_64 = V_MAD_I64_I32_e64 %23, %34, 0, 0, implicit $exec
|
|
%37:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 32, 0, implicit $exec
|
|
undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec
|
|
%38.sub0:vreg_64 = COPY %37.sub0
|
|
%39:vreg_64 = V_LSHLREV_B64_e64 3, %38, implicit $exec
|
|
undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0, 0, implicit $exec
|
|
%42:vgpr_32 = COPY %33
|
|
%40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec
|
|
%44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, implicit $exec :: (load (s64) from %ir.tmp34)
|
|
undef %45.sub1:vreg_64 = IMPLICIT_DEF
|
|
%45.sub0:vreg_64 = COPY %37.sub1
|
|
%46:vreg_64 = V_LSHLREV_B64_e64 3, %45, implicit $exec
|
|
undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32, %46.sub0, 0, implicit $exec
|
|
%49:vgpr_32 = COPY %33
|
|
%47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec
|
|
%51:vreg_64 = IMPLICIT_DEF
|
|
undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, implicit $exec :: (load (s32) from %ir.18 + 8)
|
|
%52.sub1:vreg_64 = IMPLICIT_DEF
|
|
%53:vreg_64 = V_LSHLREV_B64_e64 3, %52, implicit $exec
|
|
undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0, 0, implicit $exec
|
|
%56:vgpr_32 = COPY %33
|
|
%54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, 0, implicit $exec
|
|
%58:vreg_64 = IMPLICIT_DEF
|
|
%30.sub1:sreg_64_xexec = IMPLICIT_DEF
|
|
%59:sreg_64 = IMPLICIT_DEF
|
|
%60:sreg_32_xm0 = S_ADD_U32 %5.sub0, %59.sub0, implicit-def $scc
|
|
%61:sgpr_32 = S_ADDC_U32 %5.sub1, %59.sub1, implicit-def dead $scc, implicit killed $scc
|
|
%62:vreg_64 = GLOBAL_LOAD_DWORDX2 %35, 0, 0, implicit $exec :: (load (s64) from %ir.20, align 4)
|
|
undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec
|
|
%63.sub0:vreg_64 = COPY %62.sub0
|
|
%64:vreg_64 = IMPLICIT_DEF
|
|
undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %64.sub0, 0, implicit $exec
|
|
%67:vgpr_32 = COPY %61
|
|
%65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec
|
|
%69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, implicit $exec :: (load (s128) from %ir.tmp58)
|
|
undef %70.sub1:vreg_64 = IMPLICIT_DEF
|
|
%70.sub0:vreg_64 = IMPLICIT_DEF
|
|
%71:vreg_64 = IMPLICIT_DEF
|
|
undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %71.sub0, 0, implicit $exec
|
|
%74:vgpr_32 = COPY %61
|
|
%72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec
|
|
%76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, implicit $exec
|
|
%77:vgpr_32 = IMPLICIT_DEF
|
|
%78:vgpr_32 = IMPLICIT_DEF
|
|
%79:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %77, implicit $mode, implicit $exec
|
|
%80:vgpr_32 = IMPLICIT_DEF
|
|
%81:vgpr_32 = IMPLICIT_DEF
|
|
%84:vgpr_32 = IMPLICIT_DEF
|
|
BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, implicit $exec
|
|
BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, implicit $exec
|
|
%85:vgpr_32 = IMPLICIT_DEF
|
|
%86:vgpr_32 = IMPLICIT_DEF
|
|
%87:vgpr_32 = IMPLICIT_DEF
|
|
%88:vgpr_32 = IMPLICIT_DEF
|
|
%90:vgpr_32 = IMPLICIT_DEF
|
|
%91:vgpr_32, dead %92:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %90, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
|
%95:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, undef %93:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
|
%96:vgpr_32, %97:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 1065353216, 0, %90, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
|
%98:vgpr_32 = IMPLICIT_DEF
|
|
%99:vgpr_32 = IMPLICIT_DEF
|
|
%100:vgpr_32 = IMPLICIT_DEF
|
|
%101:vgpr_32 = IMPLICIT_DEF
|
|
%102:vgpr_32 = IMPLICIT_DEF
|
|
%103:vgpr_32 = IMPLICIT_DEF
|
|
%104:vgpr_32 = IMPLICIT_DEF
|
|
%105:vgpr_32 = IMPLICIT_DEF
|
|
%106:vgpr_32, dead %107:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, %90, 0, %90, 0, %105, 0, 0, implicit $mode, implicit $exec
|
|
%108:vgpr_32 = nofpexcept V_RCP_F32_e32 0, implicit $mode, implicit $exec
|
|
%109:vgpr_32 = IMPLICIT_DEF
|
|
%110:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%111:vgpr_32, %112:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
|
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, %110, implicit $mode, implicit $exec
|
|
%114:vgpr_32 = IMPLICIT_DEF
|
|
%115:vgpr_32 = IMPLICIT_DEF
|
|
%116:vgpr_32 = IMPLICIT_DEF
|
|
$vcc = IMPLICIT_DEF
|
|
%117:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, %116, 0, %110, 0, %115, 0, 0, implicit killed $vcc, implicit $mode, implicit $exec
|
|
%118:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, %117, 0, %90, 0, %105, 0, 0, implicit $mode, implicit $exec
|
|
%119:vgpr_32 = IMPLICIT_DEF
|
|
%120:vgpr_32 = IMPLICIT_DEF
|
|
%121:vgpr_32 = IMPLICIT_DEF
|
|
%122:vgpr_32 = IMPLICIT_DEF
|
|
%123:vgpr_32 = IMPLICIT_DEF
|
|
%124:vgpr_32 = IMPLICIT_DEF
|
|
%125:vgpr_32 = IMPLICIT_DEF
|
|
%126:vgpr_32 = IMPLICIT_DEF
|
|
DBG_VALUE %103, _, !5, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !8
|
|
ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
%127:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc
|
|
$sgpr4 = COPY $sgpr101
|
|
$vgpr0 = COPY %124
|
|
$vgpr1_vgpr2 = IMPLICIT_DEF
|
|
$vgpr3 = COPY %126
|
|
dead $sgpr30_sgpr31 = SI_CALL %127, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit $vgpr1_vgpr2, implicit killed $vgpr3
|
|
ADJCALLSTACKDOWN 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32
|
|
%128:vreg_64, dead %129:sreg_64 = V_MAD_I64_I32_e64 %20, %34, 0, 0, implicit $exec
|
|
S_ENDPGM 0
|
|
|
|
...
|