mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AMDGPU/GlobalISel: Remove selection of MAD/MAC when not available
Add code to respect mad-mac-f32-insts target feature. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D81990
This commit is contained in:
parent
4455c9ee9e
commit
d144b94e41
@ -612,10 +612,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
||||
|
||||
// Whether this is legal depends on the floating point mode for the function.
|
||||
auto &FMad = getActionDefinitionsBuilder(G_FMAD);
|
||||
if (ST.hasMadF16())
|
||||
if (ST.hasMadF16() && ST.hasMadMacF32Insts())
|
||||
FMad.customFor({S32, S16});
|
||||
else
|
||||
else if (ST.hasMadMacF32Insts())
|
||||
FMad.customFor({S32});
|
||||
else if (ST.hasMadF16())
|
||||
FMad.customFor({S16});
|
||||
FMad.scalarize(0)
|
||||
.lower();
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX101 %s
|
||||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s
|
||||
|
||||
---
|
||||
name: test_fmad_s32_flush
|
||||
@ -26,12 +27,19 @@ body: |
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX101-LABEL: name: test_fmad_s32_flush
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX101: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX103-LABEL: name: test_fmad_s32_flush
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX103: $vgpr0 = COPY [[FADD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
@ -62,12 +70,19 @@ body: |
|
||||
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX101-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX101: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
|
||||
; GFX101: $vgpr0 = COPY [[FMAD]](s32)
|
||||
; GFX103-LABEL: name: test_fmad_s32_flags_flush
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX103: $vgpr0 = COPY [[FADD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
@ -108,17 +123,30 @@ body: |
|
||||
; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
|
||||
; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
|
||||
; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v2s32_flush
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
|
||||
; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
@ -161,18 +189,33 @@ body: |
|
||||
; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
|
||||
; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
|
||||
; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
|
||||
; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v3s32_flush
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
|
||||
; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
@ -217,19 +260,36 @@ body: |
|
||||
; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
|
||||
; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
|
||||
; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
|
||||
; GFX101: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
|
||||
; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v4s32_flush
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
|
||||
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
|
||||
; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
|
||||
; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
@ -262,13 +322,20 @@ body: |
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX101-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX101: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX103-LABEL: name: test_fmad_s32_denorm
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX103: $vgpr0 = COPY [[FADD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
@ -301,13 +368,20 @@ body: |
|
||||
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX7: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX10-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX10: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX101-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX101: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX101: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX101: $vgpr0 = COPY [[FADD]](s32)
|
||||
; GFX103-LABEL: name: test_fmad_s32_flags_denorm
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
|
||||
; GFX103: $vgpr0 = COPY [[FADD]](s32)
|
||||
%0:_(s32) = COPY $vgpr0
|
||||
%1:_(s32) = COPY $vgpr1
|
||||
%2:_(s32) = COPY $vgpr2
|
||||
@ -352,19 +426,32 @@ body: |
|
||||
; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
|
||||
; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
|
||||
; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v2s32_denorm
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
|
||||
; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
|
||||
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
|
||||
; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
|
||||
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
|
||||
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5
|
||||
@ -413,21 +500,36 @@ body: |
|
||||
; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
|
||||
; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
|
||||
; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v3s32_denorm
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
|
||||
; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
|
||||
; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
|
||||
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
|
||||
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
|
||||
; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
|
||||
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
|
||||
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
|
||||
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
|
||||
@ -480,23 +582,40 @@ body: |
|
||||
; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
|
||||
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX10-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
|
||||
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
|
||||
; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
|
||||
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX101-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
|
||||
; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
|
||||
; GFX101: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX101: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
|
||||
; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
|
||||
; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
; GFX103-LABEL: name: test_fmad_v4s32_denorm
|
||||
; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
|
||||
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
|
||||
; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
|
||||
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
|
||||
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
|
||||
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
|
||||
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
|
||||
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
|
||||
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
|
||||
; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
|
||||
; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
|
||||
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
|
||||
; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
|
||||
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
|
||||
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
|
||||
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
|
||||
|
||||
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
|
||||
|
||||
@ -3147,7 +3147,7 @@ define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
|
||||
; CGP-NEXT: v_rcp_f32_e32 v2, v1
|
||||
; CGP-NEXT: v_mul_f32_e32 v2, v0, v2
|
||||
; CGP-NEXT: v_trunc_f32_e32 v2, v2
|
||||
; CGP-NEXT: v_fma_f32 v0, -v2, v1, v0
|
||||
; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
|
||||
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
||||
@ -3444,9 +3444,9 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mul_f32_e32 v6, v2, v6
|
||||
; CGP-NEXT: v_trunc_f32_e32 v5, v5
|
||||
; CGP-NEXT: v_trunc_f32_e32 v6, v6
|
||||
; CGP-NEXT: v_fma_f32 v0, -v5, v3, v0
|
||||
; CGP-NEXT: v_mad_f32 v0, -v5, v3, v0
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
|
||||
; CGP-NEXT: v_fma_f32 v2, -v6, v4, v2
|
||||
; CGP-NEXT: v_mad_f32 v2, -v6, v4, v2
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
|
||||
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
|
||||
|
@ -1,6 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
|
||||
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
|
||||
|
||||
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
|
||||
|
||||
@ -3116,7 +3116,7 @@ define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
|
||||
; CGP-NEXT: v_rcp_f32_e32 v4, v3
|
||||
; CGP-NEXT: v_mul_f32_e32 v4, v2, v4
|
||||
; CGP-NEXT: v_trunc_f32_e32 v4, v4
|
||||
; CGP-NEXT: v_fma_f32 v2, -v4, v3, v2
|
||||
; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
|
||||
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
|
||||
@ -3411,9 +3411,9 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
|
||||
; CGP-NEXT: v_mul_f32_e32 v10, v7, v10
|
||||
; CGP-NEXT: v_trunc_f32_e32 v9, v9
|
||||
; CGP-NEXT: v_trunc_f32_e32 v10, v10
|
||||
; CGP-NEXT: v_fma_f32 v5, -v9, v6, v5
|
||||
; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
|
||||
; CGP-NEXT: v_fma_f32 v7, -v10, v8, v7
|
||||
; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7
|
||||
; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10
|
||||
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6
|
||||
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
|
||||
|
Loading…
Reference in New Issue
Block a user