mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[AMDGPU] Do not clause NSA instructions
To ensure correct behaviour NSA instructions should not be claused. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D102211
This commit is contained in:
parent
ec3bd41065
commit
6f78a47930
@ -226,6 +226,12 @@ def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
|
||||
"MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
|
||||
>;
|
||||
|
||||
def FeatureNSAClauseBug : SubtargetFeature<"nsa-clause-bug",
|
||||
"HasNSAClauseBug",
|
||||
"true",
|
||||
"MIMG-NSA in a hard clause has unpredictable results on GFX10.1"
|
||||
>;
|
||||
|
||||
def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
|
||||
"HasFlatSegmentOffsetBug",
|
||||
"true",
|
||||
@ -1000,6 +1006,7 @@ def FeatureGroup {
|
||||
FeatureVcmpxExecWARHazard,
|
||||
FeatureLdsBranchVmemWARHazard,
|
||||
FeatureNSAtoVMEMBug,
|
||||
FeatureNSAClauseBug,
|
||||
FeatureOffset3fBug,
|
||||
FeatureFlatSegmentOffsetBug,
|
||||
FeatureNegativeUnalignedScratchOffsetBug
|
||||
|
@ -306,6 +306,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasVcmpxExecWARHazard(false),
|
||||
HasLdsBranchVmemWARHazard(false),
|
||||
HasNSAtoVMEMBug(false),
|
||||
HasNSAClauseBug(false),
|
||||
HasOffset3fBug(false),
|
||||
HasFlatSegmentOffsetBug(false),
|
||||
HasImageStoreD16Bug(false),
|
||||
|
@ -188,6 +188,7 @@ protected:
|
||||
bool HasVcmpxExecWARHazard;
|
||||
bool HasLdsBranchVmemWARHazard;
|
||||
bool HasNSAtoVMEMBug;
|
||||
bool HasNSAClauseBug;
|
||||
bool HasOffset3fBug;
|
||||
bool HasFlatSegmentOffsetBug;
|
||||
bool HasImageStoreD16Bug;
|
||||
@ -952,6 +953,8 @@ public:
|
||||
return HasNSAtoVMEMBug;
|
||||
}
|
||||
|
||||
bool hasNSAClauseBug() const { return HasNSAClauseBug; }
|
||||
|
||||
bool hasHardClauses() const { return getGeneration() >= GFX10; }
|
||||
|
||||
bool hasGFX90AInsts() const { return GFX90AInsts; }
|
||||
|
@ -76,10 +76,17 @@ public:
|
||||
}
|
||||
|
||||
HardClauseType getHardClauseType(const MachineInstr &MI) {
|
||||
|
||||
// On current architectures we only get a benefit from clausing loads.
|
||||
if (MI.mayLoad()) {
|
||||
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
|
||||
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
|
||||
if (ST->hasNSAClauseBug()) {
|
||||
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
|
||||
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
|
||||
return HARDCLAUSE_ILLEGAL;
|
||||
}
|
||||
return HARDCLAUSE_VMEM;
|
||||
}
|
||||
if (SIInstrInfo::isFLAT(MI))
|
||||
return HARDCLAUSE_FLAT;
|
||||
// TODO: LDS
|
||||
|
@ -359,7 +359,6 @@ define amdgpu_ps void @cluster_image_sample(<8 x i32> inreg %src, <4 x i32> inre
|
||||
; GFX10-NEXT: v_add_f32_e32 v12, 1.0, v3
|
||||
; GFX10-NEXT: v_add_f32_e32 v14, 2.0, v2
|
||||
; GFX10-NEXT: v_add_f32_e32 v15, 2.0, v3
|
||||
; GFX10-NEXT: s_clause 0x1
|
||||
; GFX10-NEXT: image_sample_d v[2:5], [v11, v12, v13, v13, v13, v13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: image_sample_d v[6:9], [v14, v15, v10, v10, v10, v10], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
|
@ -218,11 +218,8 @@ body: |
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK-LABEL: name: mimg_nsa
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec {
|
||||
; CHECK: S_CLAUSE 1
|
||||
; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: }
|
||||
; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
...
|
||||
@ -235,12 +232,9 @@ body: |
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK-LABEL: name: mimg_nsa_mixed
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
|
||||
; CHECK: BUNDLE implicit-def $vgpr10_vgpr11_vgpr12_vgpr13, implicit-def $vgpr10, implicit-def $vgpr10_lo16, implicit-def $vgpr10_hi16, implicit-def $vgpr11, implicit-def $vgpr11_lo16, implicit-def $vgpr11_hi16, implicit-def $vgpr12, implicit-def $vgpr12_lo16, implicit-def $vgpr12_hi16, implicit-def $vgpr13, implicit-def $vgpr13_lo16, implicit-def $vgpr13_hi16, implicit-def $vgpr10_vgpr11, implicit-def $vgpr10_vgpr11_vgpr12, implicit-def $vgpr11_vgpr12, implicit-def $vgpr11_vgpr12_vgpr13, implicit-def $vgpr12_vgpr13, implicit-def $vgpr14, implicit-def $vgpr14_lo16, implicit-def $vgpr14_hi16, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit-def $vgpr20, implicit-def $vgpr20_lo16, implicit-def $vgpr20_hi16, implicit-def $vgpr21, implicit-def $vgpr21_lo16, implicit-def $vgpr21_hi16, implicit-def $vgpr22, implicit-def $vgpr22_lo16, implicit-def $vgpr22_hi16, implicit-def $vgpr23, implicit-def $vgpr23_lo16, implicit-def $vgpr23_hi16, implicit-def $vgpr20_vgpr21, implicit-def $vgpr20_vgpr21_vgpr22, implicit-def $vgpr21_vgpr22, implicit-def $vgpr21_vgpr22_vgpr23, implicit-def $vgpr22_vgpr23, implicit $vgpr3, implicit $vgpr8, implicit $vgpr7, implicit $vgpr5, implicit $vgpr4, implicit $vgpr6, implicit $vgpr0, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; CHECK: S_CLAUSE 2
|
||||
; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: }
|
||||
; CHECK: $vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
; CHECK: $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
$vgpr10_vgpr11_vgpr12_vgpr13 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "ImageResource")
|
||||
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16)
|
||||
|
Loading…
Reference in New Issue
Block a user