1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

AMDGPU/GlobalISel: Remove selection of MAD/MAC when not available

Add code to respect mad-mac-f32-insts target feature.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D81990
This commit is contained in:
Carl Ritson 2020-06-19 10:29:47 +09:00
parent 4455c9ee9e
commit d144b94e41
4 changed files with 241 additions and 120 deletions

View File

@ -612,10 +612,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Whether this is legal depends on the floating point mode for the function. // Whether this is legal depends on the floating point mode for the function.
auto &FMad = getActionDefinitionsBuilder(G_FMAD); auto &FMad = getActionDefinitionsBuilder(G_FMAD);
if (ST.hasMadF16()) if (ST.hasMadF16() && ST.hasMadMacF32Insts())
FMad.customFor({S32, S16}); FMad.customFor({S32, S16});
else else if (ST.hasMadMacF32Insts())
FMad.customFor({S32}); FMad.customFor({S32});
else if (ST.hasMadF16())
FMad.customFor({S16});
FMad.scalarize(0) FMad.scalarize(0)
.lower(); .lower();

View File

@ -1,7 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX7 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX10 %s # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX101 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1030 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX103 %s
--- ---
name: test_fmad_s32_flush name: test_fmad_s32_flush
@ -26,12 +27,19 @@ body: |
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
; GFX7: $vgpr0 = COPY [[FMAD]](s32) ; GFX7: $vgpr0 = COPY [[FMAD]](s32)
; GFX10-LABEL: name: test_fmad_s32_flush ; GFX101-LABEL: name: test_fmad_s32_flush
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
; GFX10: $vgpr0 = COPY [[FMAD]](s32) ; GFX101: $vgpr0 = COPY [[FMAD]](s32)
; GFX103-LABEL: name: test_fmad_s32_flush
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX103: $vgpr0 = COPY [[FADD]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr2
@ -62,12 +70,19 @@ body: |
; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX7: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX7: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
; GFX7: $vgpr0 = COPY [[FMAD]](s32) ; GFX7: $vgpr0 = COPY [[FMAD]](s32)
; GFX10-LABEL: name: test_fmad_s32_flags_flush ; GFX101-LABEL: name: test_fmad_s32_flags_flush
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]] ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = nnan G_FMAD [[COPY]], [[COPY1]], [[COPY2]]
; GFX10: $vgpr0 = COPY [[FMAD]](s32) ; GFX101: $vgpr0 = COPY [[FMAD]](s32)
; GFX103-LABEL: name: test_fmad_s32_flags_flush
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
; GFX103: $vgpr0 = COPY [[FADD]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr2
@ -108,17 +123,30 @@ body: |
; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] ; GFX7: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX10-LABEL: name: test_fmad_v2s32_flush ; GFX101-LABEL: name: test_fmad_v2s32_flush
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]] ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV2]], [[UV4]]
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]] ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV3]], [[UV5]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32)
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX103-LABEL: name: test_fmad_v2s32_flush
; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@ -161,18 +189,33 @@ body: |
; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] ; GFX7: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX10-LABEL: name: test_fmad_v3s32_flush ; GFX101-LABEL: name: test_fmad_v3s32_flush
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]] ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV3]], [[UV6]]
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]] ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV4]], [[UV7]]
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]] ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV5]], [[UV8]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX103-LABEL: name: test_fmad_v3s32_flush
; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
@ -217,19 +260,36 @@ body: |
; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] ; GFX7: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
; GFX10-LABEL: name: test_fmad_v4s32_flush ; GFX101-LABEL: name: test_fmad_v4s32_flush
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
; GFX10: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]] ; GFX101: [[FMAD:%[0-9]+]]:_(s32) = G_FMAD [[UV]], [[UV4]], [[UV8]]
; GFX10: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]] ; GFX101: [[FMAD1:%[0-9]+]]:_(s32) = G_FMAD [[UV1]], [[UV5]], [[UV9]]
; GFX10: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]] ; GFX101: [[FMAD2:%[0-9]+]]:_(s32) = G_FMAD [[UV2]], [[UV6]], [[UV10]]
; GFX10: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]] ; GFX101: [[FMAD3:%[0-9]+]]:_(s32) = G_FMAD [[UV3]], [[UV7]], [[UV11]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMAD]](s32), [[FMAD1]](s32), [[FMAD2]](s32), [[FMAD3]](s32)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
; GFX103-LABEL: name: test_fmad_v4s32_flush
; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
@ -262,13 +322,20 @@ body: |
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX7: $vgpr0 = COPY [[FADD]](s32) ; GFX7: $vgpr0 = COPY [[FADD]](s32)
; GFX10-LABEL: name: test_fmad_s32_denorm ; GFX101-LABEL: name: test_fmad_s32_denorm
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX10: $vgpr0 = COPY [[FADD]](s32) ; GFX101: $vgpr0 = COPY [[FADD]](s32)
; GFX103-LABEL: name: test_fmad_s32_denorm
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX103: $vgpr0 = COPY [[FADD]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr2
@ -301,13 +368,20 @@ body: |
; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX7: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] ; GFX7: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
; GFX7: $vgpr0 = COPY [[FADD]](s32) ; GFX7: $vgpr0 = COPY [[FADD]](s32)
; GFX10-LABEL: name: test_fmad_s32_flags_denorm ; GFX101-LABEL: name: test_fmad_s32_flags_denorm
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX101: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX101: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX101: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]] ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
; GFX10: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]] ; GFX101: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
; GFX10: $vgpr0 = COPY [[FADD]](s32) ; GFX101: $vgpr0 = COPY [[FADD]](s32)
; GFX103-LABEL: name: test_fmad_s32_flags_denorm
; GFX103: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX103: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX103: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[COPY1]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = nnan G_FADD [[FMUL]], [[COPY2]]
; GFX103: $vgpr0 = COPY [[FADD]](s32)
%0:_(s32) = COPY $vgpr0 %0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1 %1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr2
@ -352,19 +426,32 @@ body: |
; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] ; GFX7: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX7: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX10-LABEL: name: test_fmad_v2s32_denorm ; GFX101-LABEL: name: test_fmad_v2s32_denorm
; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX101: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX101: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 ; GFX101: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX101: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]] ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]] ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]] ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]] ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX101: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
; GFX103-LABEL: name: test_fmad_v2s32_denorm
; GFX103: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; GFX103: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
; GFX103: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; GFX103: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV2]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV4]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV3]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV5]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32)
; GFX103: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
%2:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5
@ -413,21 +500,36 @@ body: |
; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] ; GFX7: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX10-LABEL: name: test_fmad_v3s32_denorm ; GFX101-LABEL: name: test_fmad_v3s32_denorm
; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX101: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; GFX101: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; GFX10: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 ; GFX101: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; GFX101: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX10: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) ; GFX101: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]] ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]] ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]] ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]] ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]] ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]] ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX101: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
; GFX103-LABEL: name: test_fmad_v3s32_denorm
; GFX103: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; GFX103: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
; GFX103: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; GFX103: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX103: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV3]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV6]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV4]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV7]]
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV5]]
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV8]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32)
; GFX103: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5
%2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8
@ -480,23 +582,40 @@ body: |
; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] ; GFX7: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX7: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
; GFX10-LABEL: name: test_fmad_v4s32_denorm ; GFX101-LABEL: name: test_fmad_v4s32_denorm
; GFX10: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX101: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX101: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX10: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX101: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; GFX101: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX10: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) ; GFX101: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
; GFX10: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) ; GFX101: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]] ; GFX101: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]] ; GFX101: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]] ; GFX101: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
; GFX10: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]] ; GFX101: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]] ; GFX101: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
; GFX10: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]] ; GFX101: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]] ; GFX101: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
; GFX10: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]] ; GFX101: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32) ; GFX101: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX101: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
; GFX103-LABEL: name: test_fmad_v4s32_denorm
; GFX103: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX103: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; GFX103: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
; GFX103: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; GFX103: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
; GFX103: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>)
; GFX103: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[UV4]]
; GFX103: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV8]]
; GFX103: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[UV5]]
; GFX103: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL1]], [[UV9]]
; GFX103: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[UV6]]
; GFX103: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL2]], [[UV10]]
; GFX103: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[UV3]], [[UV7]]
; GFX103: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[UV11]]
; GFX103: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32), [[FADD2]](s32), [[FADD3]](s32)
; GFX103: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
@ -3147,7 +3147,7 @@ define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
; CGP-NEXT: v_rcp_f32_e32 v2, v1 ; CGP-NEXT: v_rcp_f32_e32 v2, v1
; CGP-NEXT: v_mul_f32_e32 v2, v0, v2 ; CGP-NEXT: v_mul_f32_e32 v2, v0, v2
; CGP-NEXT: v_trunc_f32_e32 v2, v2 ; CGP-NEXT: v_trunc_f32_e32 v2, v2
; CGP-NEXT: v_fma_f32 v0, -v2, v1, v0 ; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
@ -3444,9 +3444,9 @@ define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_f32_e32 v6, v2, v6 ; CGP-NEXT: v_mul_f32_e32 v6, v2, v6
; CGP-NEXT: v_trunc_f32_e32 v5, v5 ; CGP-NEXT: v_trunc_f32_e32 v5, v5
; CGP-NEXT: v_trunc_f32_e32 v6, v6 ; CGP-NEXT: v_trunc_f32_e32 v6, v6
; CGP-NEXT: v_fma_f32 v0, -v5, v3, v0 ; CGP-NEXT: v_mad_f32 v0, -v5, v3, v0
; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
; CGP-NEXT: v_fma_f32 v2, -v6, v4, v2 ; CGP-NEXT: v_mad_f32 v2, -v6, v4, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v3
; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
@ -3116,7 +3116,7 @@ define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
; CGP-NEXT: v_rcp_f32_e32 v4, v3 ; CGP-NEXT: v_rcp_f32_e32 v4, v3
; CGP-NEXT: v_mul_f32_e32 v4, v2, v4 ; CGP-NEXT: v_mul_f32_e32 v4, v2, v4
; CGP-NEXT: v_trunc_f32_e32 v4, v4 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
; CGP-NEXT: v_fma_f32 v2, -v4, v3, v2 ; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2
; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3
; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
@ -3411,9 +3411,9 @@ define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_f32_e32 v10, v7, v10 ; CGP-NEXT: v_mul_f32_e32 v10, v7, v10
; CGP-NEXT: v_trunc_f32_e32 v9, v9 ; CGP-NEXT: v_trunc_f32_e32 v9, v9
; CGP-NEXT: v_trunc_f32_e32 v10, v10 ; CGP-NEXT: v_trunc_f32_e32 v10, v10
; CGP-NEXT: v_fma_f32 v5, -v9, v6, v5 ; CGP-NEXT: v_mad_f32 v5, -v9, v6, v5
; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 ; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
; CGP-NEXT: v_fma_f32 v7, -v10, v8, v7 ; CGP-NEXT: v_mad_f32 v7, -v10, v8, v7
; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10
; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, v6
; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5] ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]