mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
AMDGPU/SI: Use float as the operand type for amdgcn.interp intrinsics
Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye Differential Revision: https://reviews.llvm.org/D26724 llvm-svn: 287962
This commit is contained in:
parent
9ca4196263
commit
f3e7f685e9
@ -479,7 +479,7 @@ def int_amdgcn_s_getreg :
|
||||
def int_amdgcn_interp_p1 :
|
||||
GCCBuiltin<"__builtin_amdgcn_interp_p1">,
|
||||
Intrinsic<[llvm_float_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>; // This intrinsic reads from lds, but the memory
|
||||
// values are constant, so it behaves like IntrNoMem.
|
||||
|
||||
@ -487,7 +487,7 @@ def int_amdgcn_interp_p1 :
|
||||
def int_amdgcn_interp_p2 :
|
||||
GCCBuiltin<"__builtin_amdgcn_interp_p2">,
|
||||
Intrinsic<[llvm_float_ty],
|
||||
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is
|
||||
// IntrNoMem.
|
||||
|
||||
|
@ -2476,6 +2476,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
|
||||
DAG.getConstant(1, DL, MVT::i32));
|
||||
I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I);
|
||||
J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J);
|
||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
|
||||
SDValue Glue = M0.getValue(1);
|
||||
SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
|
||||
|
@ -52,7 +52,7 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
|
||||
(outs VGPR_32:$dst),
|
||||
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
|
||||
"v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
|
||||
[(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
|
||||
[(set f32:$dst, (AMDGPUinterp_p1 f32:$i, (i32 imm:$attr_chan),
|
||||
(i32 imm:$attr)))]
|
||||
>;
|
||||
|
||||
@ -75,7 +75,7 @@ defm V_INTERP_P2_F32 : VINTRP_m <
|
||||
(outs VGPR_32:$dst),
|
||||
(ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
|
||||
"v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
|
||||
[(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
|
||||
[(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, f32:$j, (i32 imm:$attr_chan),
|
||||
(i32 imm:$attr)))]>;
|
||||
|
||||
} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
|
||||
|
@ -6,23 +6,23 @@
|
||||
;GCN: s_mov_b32 m0, s{{[0-9]+}}
|
||||
;GCN: v_interp_p1_f32
|
||||
;GCN: v_interp_p2_f32
|
||||
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) {
|
||||
define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) {
|
||||
main_body:
|
||||
%i = extractelement <2 x i32> %4, i32 0
|
||||
%j = extractelement <2 x i32> %4, i32 1
|
||||
%p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3)
|
||||
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3)
|
||||
%p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3)
|
||||
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3)
|
||||
%i = extractelement <2 x float> %4, i32 0
|
||||
%j = extractelement <2 x float> %4, i32 1
|
||||
%p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3)
|
||||
%p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3)
|
||||
%p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3)
|
||||
%p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3)
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #0
|
||||
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #0
|
||||
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user