1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
dfukalov efaecfc60e [AMDGPU][CostModel] Refine cost model for half- and quarter-rate instructions.
1. Throughput and codesize costs estimations was separated and updated.
2. Updated fdiv cost estimation for different cases.
3. Added scalarization processing for types that are treated as !isSimple() to
improve codesize estimation in getArithmeticInstrCost() and
getArithmeticInstrCost(). The code was borrowed from TCK_RecipThroughput path
of base implementation.

Next step is unify scalarization part in base class that is currently works for
TCK_RecipThroughput path only.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D89973
2020-10-24 19:53:08 +03:00

113 lines
5.0 KiB
LLVM

; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s
; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=SIZEALL,FASTF16,ALL %s
; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZEALL,SLOWF16,ALL %s
; ALL-LABEL: 'fsub_f32'
; ALL: estimated cost of 1 for {{.*}} fsub float
define amdgpu_kernel void @fsub_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
%vec = load float, float addrspace(1)* %vaddr
%add = fsub float %vec, %b
store float %add, float addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v2f32'
; ALL: estimated cost of 2 for {{.*}} fsub <2 x float>
define amdgpu_kernel void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
%vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
%add = fsub <2 x float> %vec, %b
store <2 x float> %add, <2 x float> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v3f32'
; ALL: estimated cost of 3 for {{.*}} fsub <3 x float>
define amdgpu_kernel void @fsub_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
%vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
%add = fsub <3 x float> %vec, %b
store <3 x float> %add, <3 x float> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v5f32'
; ALL: estimated cost of 5 for {{.*}} fsub <5 x float>
define amdgpu_kernel void @fsub_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #0 {
%vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr
%add = fsub <5 x float> %vec, %b
store <5 x float> %add, <5 x float> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_f64'
; FASTF64: estimated cost of 2 for {{.*}} fsub double
; SLOWF64: estimated cost of 4 for {{.*}} fsub double
; SIZEALL: estimated cost of 2 for {{.*}} fsub double
define amdgpu_kernel void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
%vec = load double, double addrspace(1)* %vaddr
%add = fsub double %vec, %b
store double %add, double addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v2f64'
; FASTF64: estimated cost of 4 for {{.*}} fsub <2 x double>
; SLOWF64: estimated cost of 8 for {{.*}} fsub <2 x double>
; SIZEALL: estimated cost of 4 for {{.*}} fsub <2 x double>
define amdgpu_kernel void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
%vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
%add = fsub <2 x double> %vec, %b
store <2 x double> %add, <2 x double> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v3f64'
; FASTF64: estimated cost of 6 for {{.*}} fsub <3 x double>
; SLOWF64: estimated cost of 12 for {{.*}} fsub <3 x double>
; SIZEALL: estimated cost of 6 for {{.*}} fsub <3 x double>
define amdgpu_kernel void @fsub_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
%vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
%add = fsub <3 x double> %vec, %b
store <3 x double> %add, <3 x double> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_f16'
; ALL: estimated cost of 1 for {{.*}} fsub half
define amdgpu_kernel void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
%vec = load half, half addrspace(1)* %vaddr
%add = fsub half %vec, %b
store half %add, half addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v2f16'
; SLOWF16: estimated cost of 2 for {{.*}} fsub <2 x half>
; FASTF16: estimated cost of 1 for {{.*}} fsub <2 x half>
define amdgpu_kernel void @fsub_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
%vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
%add = fsub <2 x half> %vec, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v3f16'
; SLOWF16: estimated cost of 4 for {{.*}} fsub <3 x half>
; FASTF16: estimated cost of 2 for {{.*}} fsub <3 x half>
define amdgpu_kernel void @fsub_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 {
%vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr
%add = fsub <3 x half> %vec, %b
store <3 x half> %add, <3 x half> addrspace(1)* %out
ret void
}
; ALL-LABEL: 'fsub_v4f16'
; SLOWF16: estimated cost of 4 for {{.*}} fsub <4 x half>
; FASTF16: estimated cost of 2 for {{.*}} fsub <4 x half>
define amdgpu_kernel void @fsub_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
%vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
%add = fsub <4 x half> %vec, %b
store <4 x half> %add, <4 x half> addrspace(1)* %out
ret void
}