1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[GlobalISel] Add X,Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z

Add a combiner helper that replaces G_UNMERGE where all the destination lanes
are dead except the first one with a G_TRUNC.

Differential Revision: https://reviews.llvm.org/D87174
This commit is contained in:
Quentin Colombet 2020-09-04 17:09:38 -07:00
parent a1f6a63fbc
commit b9fe935943
9 changed files with 1581 additions and 100 deletions

View File

@ -258,6 +258,10 @@ public:
bool applyCombineUnmergeConstant(MachineInstr &MI,
SmallVectorImpl<APInt> &Csts);
/// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
/// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg);

View File

@ -421,6 +421,14 @@ def unmerge_cst : GICombineRule<
(apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
>;
// Transform x,y<dead> = unmerge z -> x = trunc z.
def unmerge_dead_to_trunc : GICombineRule<
(defs root:$d),
(match (wip_match_opcode G_UNMERGE_VALUES): $d,
[{ return Helper.matchCombineUnmergeWithDeadLanesToTrunc(*${d}); }]),
(apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }])
>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@ -452,4 +460,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
width_reduction_combines, select_combines,
known_bits_simplifications, ext_ext_fold,
not_cmp_fold, opt_brcond_by_inverting_cond,
unmerge_merge, fabs_fabs_fold, unmerge_cst]>;
unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc]>;

View File

@ -1654,6 +1654,39 @@ bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
return true;
}
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
// Check that all the lanes are dead except the first one.
for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
return false;
}
return true;
}
bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
Builder.setInstrAndDebugLoc(MI);
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
// Truncating a vector is going to truncate every single lane,
// whereas we want the full lowbits.
// Do the operation on a scalar instead.
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
SrcReg =
Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
Register Dst0Reg = MI.getOperand(0).getReg();
LLT Dst0Ty = MRI.getType(Dst0Reg);
if (Dst0Ty.isVector()) {
auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
Builder.buildCast(Dst0Reg, MIB);
} else
Builder.buildTrunc(Dst0Reg, SrcReg);
MI.eraseFromParent();
return true;
}
bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
unsigned TargetShiftSize,
unsigned &ShiftVal) {

View File

@ -292,3 +292,80 @@ body: |
$h2 = COPY %3(s16)
$h3 = COPY %4(s16)
...
# Transform unmerge into trunc when only the first definition is live.
---
name: test_combine_unmerge_dead_to_trunc
body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64)
; CHECK: $h0 = COPY [[TRUNC]](s16)
%0:_(s64) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
$h0 = COPY %1(s16)
...
# Don't transform unmerge into trunc when middle lanes are live.
---
name: test_dont_combine_unmerge_dead_to_trunc
body: |
bb.1:
; CHECK-LABEL: name: test_dont_combine_unmerge_dead_to_trunc
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: $h0 = COPY [[UV2]](s16)
%0:_(s64) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(s64)
$h0 = COPY %3(s16)
...
# Transform unmerge into trunc when only the first definition is live, even
# if the input and output types are vectors.
---
name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
; CHECK: $w0 = COPY [[BITCAST1]](<2 x s16>)
%0:_(<2 x s32>) = COPY $x0
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<2 x s32>)
$w0 = COPY %1(<2 x s16>)
...
# Transform unmerge into trunc when only the first definition is live, even
# if the input type is vector.
---
name: test_combine_unmerge_dead_to_trunc_vec_in
body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
; CHECK: $h0 = COPY [[TRUNC]](s16)
%0:_(<2 x s32>) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>)
$h0 = COPY %1(s16)
...
# Transform unmerge into trunc when only the first definition is live, even
# if the output type are vector.
---
name: test_combine_unmerge_dead_to_trunc_vec_out
body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_out
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
; CHECK: $w0 = COPY [[BITCAST]](<2 x s16>)
%0:_(s64) = COPY $x0
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(s64)
$w0 = COPY %1(<2 x s16>)
...

View File

@ -12,9 +12,9 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_32_s64amt
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32)
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CONSTANT i64 32
@ -32,9 +32,9 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_32
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[UV]](s32)
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[TRUNC]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s32) = G_CONSTANT i32 32
@ -52,9 +52,9 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_33
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
@ -93,9 +93,9 @@ body: |
; CHECK-LABEL: name: narrow_shl_s64_63
; CHECK: liveins: $vgpr0_vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[C]](s32)
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[SHL]](s32)
; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)

View File

@ -110,15 +110,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>)
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
; UNPACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>)
; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; UNPACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; UNPACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
; UNPACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32)
; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; UNPACKED: S_ENDPGM 0
@ -140,9 +141,29 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; PACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; PACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>)
; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96)
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
; PACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; PACKED: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
; PACKED: [[COPY13:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C1]]
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; PACKED: [[COPY14:%[0-9]+]]:_(s32) = COPY [[UV1]](s32)
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C1]]
; PACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
; PACKED: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[DEF]](<2 x s16>)
; PACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS1]](<6 x s16>), 0
; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[EXTRACT]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store 6 into custom "TargetCustom8", align 8)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void

View File

@ -174,22 +174,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX6: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX6: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
; GFX7-LABEL: name: s_buffer_load_v3i32
@ -203,22 +201,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX7: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX7: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
; GFX8-LABEL: name: s_buffer_load_v3i32
@ -232,22 +228,20 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0, 0 :: (dereferenceable invariant load 12, align 4)
; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[COPY5:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX8: [[COPY7:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX8: [[COPY8:%[0-9]+]]:sgpr_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub0
; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub1
; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY5]].sub2
; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]]
; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2
; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0
; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1
; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2
; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]]
; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec
; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]]
; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec
; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]]
; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec
; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY11]]
; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec
; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]]
; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec
; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]]
; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
%val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0)
@ -1600,15 +1594,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX6: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX6: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX6: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX6: $vgpr0 = COPY [[COPY11]]
; GFX6: $vgpr1 = COPY [[COPY12]]
; GFX6: $vgpr2 = COPY [[COPY13]]
; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX6: $vgpr0 = COPY [[COPY8]]
; GFX6: $vgpr1 = COPY [[COPY9]]
; GFX6: $vgpr2 = COPY [[COPY10]]
; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset
; GFX7: bb.1 (%ir-block.0):
@ -1626,15 +1617,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX7: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX7: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX7: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX7: $vgpr0 = COPY [[COPY11]]
; GFX7: $vgpr1 = COPY [[COPY12]]
; GFX7: $vgpr2 = COPY [[COPY13]]
; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX7: $vgpr0 = COPY [[COPY8]]
; GFX7: $vgpr1 = COPY [[COPY9]]
; GFX7: $vgpr2 = COPY [[COPY10]]
; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset
; GFX8: bb.1 (%ir-block.0):
@ -1652,15 +1640,12 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r
; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]]
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11
; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2
; GFX8: [[COPY8:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub3_sub4_sub5
; GFX8: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub6_sub7_sub8
; GFX8: [[COPY10:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub9_sub10_sub11
; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX8: $vgpr0 = COPY [[COPY11]]
; GFX8: $vgpr1 = COPY [[COPY12]]
; GFX8: $vgpr2 = COPY [[COPY13]]
; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0
; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1
; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2
; GFX8: $vgpr0 = COPY [[COPY8]]
; GFX8: $vgpr1 = COPY [[COPY9]]
; GFX8: $vgpr2 = COPY [[COPY10]]
; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0)
ret <3 x float> %val

View File

@ -37,8 +37,9 @@ body: |
; GCN-LABEL: name: select_from_same_results_of_unmerge_values
; GCN: liveins: $vgpr0
; GCN: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
; GCN: $vgpr0 = COPY [[UV]](s32)
; GCN: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
; GCN: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
; GCN: $vgpr0 = COPY [[TRUNC]](s32)
; GCN: SI_RETURN_TO_EPILOG $vgpr0
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = COPY $vgpr0