diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index a3135a78763..d9027284850 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11690,3 +11690,18 @@ bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, SmallPtrSet Visited; return hasCFUser(V, Visited, Subtarget->getWavefrontSize()); } + +std::pair +SITargetLowering::getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const { + auto Cost = TargetLoweringBase::getTypeLegalizationCost(DL, Ty); + auto Size = DL.getTypeSizeInBits(Ty); + // Maximum load or store can handle 8 dwords for scalar and 4 for + // vector ALU. Let's assume anything above 8 dwords is expensive + // even if legal. + if (Size <= 256) + return Cost; + + Cost.first = (Size + 255) / 256; + return Cost; +} diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index ffe9140d3d0..f4c07646405 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -464,6 +464,9 @@ public: MachineFunction &MF, const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const; + + std::pair getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const; }; } // End namespace llvm diff --git a/test/Analysis/CostModel/AMDGPU/add-sub.ll b/test/Analysis/CostModel/AMDGPU/add-sub.ll index 9a2c01058b2..609769fd514 100644 --- a/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -90,7 +90,7 @@ define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add } ; ALL: 'add_v16i64' -; ALL: estimated cost of 32 for {{.*}} add <16 x i64> +; ALL: estimated cost of 128 for {{.*}} add <16 x i64> define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 { %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr %add = add <16 x i64> %vec, %b diff --git a/test/Analysis/CostModel/AMDGPU/mul.ll b/test/Analysis/CostModel/AMDGPU/mul.ll index 4d8a66ecd42..fa36d391f9c 100644 --- a/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/test/Analysis/CostModel/AMDGPU/mul.ll @@ -90,7 +90,7 @@ define amdgpu_kernel void @mul_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add ; ALL: 'mul_v8i64' -; ALL: estimated cost of 128 for {{.*}} mul <8 x i64> +; ALL: estimated cost of 256 for {{.*}} mul <8 x i64> define amdgpu_kernel void @mul_v8i64(<8 x i64> addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr, <8 x i64> %b) #0 { %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr %mul = mul <8 x i64> %vec, %b