mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
[AMDGPU] Tweak getTypeLegalizationCost()
Even though wide vectors are legal they still cost more as we will have to eventually split them. Not all operations can be uniformly done on vector types. Conservatively add the cost of splitting at least to 8 dwords, which is our widest possible load. We are more or less lying to cost mode with this change but this can prevent vectorizer from creation of wide vectors which results in RA problems for us. Differential Revision: https://reviews.llvm.org/D83078
This commit is contained in:
parent
463547bcbf
commit
95821df464
@ -11690,3 +11690,18 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
|
||||
SmallPtrSet<const Value *, 16> Visited;
|
||||
return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
|
||||
}
|
||||
|
||||
std::pair<int, MVT>
|
||||
SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const {
|
||||
auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
|
||||
auto Size = DL.getTypeSizeInBits(Ty);
|
||||
// Maximum load or store can handle 8 dwords for scalar and 4 for
|
||||
// vector ALU. Let's assume anything above 8 dwords is expensive
|
||||
// even if legal.
|
||||
if (Size <= 256)
|
||||
return Cost;
|
||||
|
||||
Cost.first = (Size + 255) / 256;
|
||||
return Cost;
|
||||
}
|
||||
|
@ -464,6 +464,9 @@ public:
|
||||
MachineFunction &MF,
|
||||
const SIRegisterInfo &TRI,
|
||||
SIMachineFunctionInfo &Info) const;
|
||||
|
||||
std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
|
||||
Type *Ty) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -90,7 +90,7 @@ define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
|
||||
}
|
||||
|
||||
; ALL: 'add_v16i64'
|
||||
; ALL: estimated cost of 32 for {{.*}} add <16 x i64>
|
||||
; ALL: estimated cost of 128 for {{.*}} add <16 x i64>
|
||||
define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 {
|
||||
%vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr
|
||||
%add = add <16 x i64> %vec, %b
|
||||
|
@ -90,7 +90,7 @@ define amdgpu_kernel void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
|
||||
|
||||
|
||||
; ALL: 'mul_v8i64'
|
||||
; ALL: estimated cost of 128 for {{.*}} mul <8 x i64>
|
||||
; ALL: estimated cost of 256 for {{.*}} mul <8 x i64>
|
||||
define amdgpu_kernel void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr, <8 x i64> %b) #0 {
|
||||
%vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
|
||||
%mul = mul <8 x i64> %vec, %b
|
||||
|
Loading…
Reference in New Issue
Block a user