diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 84582e8b992..a9a7a2789a7 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -11,6 +11,10 @@ // //===----------------------------------------------------------------------===// +class AMDGPUReadPreloadRegisterIntrinsic + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + let TargetPrefix = "r600" in { class R600ReadPreloadRegisterIntrinsic @@ -41,15 +45,30 @@ def int_r600_rat_store_typed : Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, GCCBuiltin<"__builtin_r600_rat_store_typed">; +def int_r600_rsq : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < + "__builtin_r600_read_workdim" +>; + } // End TargetPrefix = "r600" +// FIXME: These should be renamed/moved to r600 let TargetPrefix = "AMDGPU" in { +def int_AMDGPU_rsq_clamped : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; -class AMDGPUReadPreloadRegisterIntrinsic - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, - GCCBuiltin; +def int_AMDGPU_ldexp : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] +>; +} -def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">, +let TargetPrefix = "amdgcn" in { + +def int_amdgcn_div_scale : GCCBuiltin<"__builtin_amdgcn_div_scale">, // 1st parameter: Numerator // 2nd parameter: Denominator // 3rd parameter: Constant to select select between first and @@ -58,43 +77,39 @@ def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">, [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], [IntrNoMem]>; -def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">, +def int_amdgcn_div_fmas : GCCBuiltin<"__builtin_amdgcn_div_fmas">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty], [IntrNoMem]>; -def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">, +def int_amdgcn_div_fixup : GCCBuiltin<"__builtin_amdgcn_div_fixup">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">, +def int_amdgcn_trig_preop : GCCBuiltin<"__builtin_amdgcn_trig_preop">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; -def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">, +def int_amdgcn_rcp : GCCBuiltin<"__builtin_amdgcn_rcp">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">, +def int_amdgcn_rsq : GCCBuiltin<"__builtin_amdgcn_rsq">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">, +def int_amdgcn_rsq_clamped : GCCBuiltin<"__builtin_amdgcn_rsq_clamped">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; -def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">, +def int_amdgcn_ldexp : GCCBuiltin<"__builtin_amdgcn_ldexp">, Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; -def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">, +def int_amdgcn_class : GCCBuiltin<"__builtin_amdgcn_class">, Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; -def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < - "__builtin_amdgpu_read_workdim">; +def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic < + "__builtin_amdgcn_read_workdim">; -} // End TargetPrefix = "AMDGPU" -let TargetPrefix = "amdgcn" in { - -// SI only def int_amdgcn_buffer_wbinvl1_sc : GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">, Intrinsic<[], [], []>; diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a211390c4b0..03d3a36b4bb 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -925,7 +925,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::AMDGPU_div_scale: { + case Intrinsic::amdgcn_div_scale: { // 3rd parameter required to be a constant. const ConstantSDNode *Param = dyn_cast(Op.getOperand(3)); if (!Param) @@ -947,28 +947,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Denominator, Numerator); } - case Intrinsic::AMDGPU_div_fmas: + case Intrinsic::amdgcn_div_fmas: return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Op.getOperand(4)); - case Intrinsic::AMDGPU_div_fixup: + case Intrinsic::amdgcn_div_fixup: return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::AMDGPU_trig_preop: + case Intrinsic::amdgcn_trig_preop: return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT, Op.getOperand(1), Op.getOperand(2)); - case Intrinsic::AMDGPU_rcp: + case Intrinsic::amdgcn_rcp: return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); - case Intrinsic::AMDGPU_rsq: + case Intrinsic::amdgcn_rsq: return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_legacy_rsq: return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); + case Intrinsic::amdgcn_rsq_clamped: case Intrinsic::AMDGPU_rsq_clamped: if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { Type *Type = VT.getTypeForEVT(*DAG.getContext()); @@ -984,7 +985,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); } - case Intrinsic::AMDGPU_ldexp: + case Intrinsic::amdgcn_ldexp: + case Intrinsic::AMDGPU_ldexp: // Legacy name return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -1039,7 +1041,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); - case Intrinsic::AMDGPU_class: + case Intrinsic::amdgcn_class: return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT, Op.getOperand(1), Op.getOperand(2)); diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td index d8701d13b55..ae564df966a 100644 --- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -61,10 +61,17 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + + // Deprecated in favor of llvm.bitreverse + def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + + // Deprecated in favor of llvm.amdgcn.s.barrier def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>; - def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>; + def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>; + + // Deprecated in favor of llvm.amdgcn.read.workdim + def int_AMDGPU_read_workdim : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; } // Legacy names for compatibility. diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index d6b6e197657..6c052b810b3 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -781,7 +781,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_read_local_size_z: return LowerImplicitParameter(DAG, VT, DL, 8); - case Intrinsic::AMDGPU_read_workdim: { + case Intrinsic::r600_read_workdim: + case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name. uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM); return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4); } @@ -804,7 +805,12 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, AMDGPU::T0_Z, VT); - case Intrinsic::AMDGPU_rsq: + + // FIXME: Should be renamed to r600 prefix + case Intrinsic::AMDGPU_rsq_clamped: + return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + + case Intrinsic::r600_rsq: // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior. return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index efd8075dde4..7ba546a66dc 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1312,7 +1312,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::r600_read_local_size_z: return lowerImplicitZextParam(DAG, Op, MVT::i16, SI::KernelInputOffsets::LOCAL_SIZE_Z); - case Intrinsic::AMDGPU_read_workdim: + case Intrinsic::amdgcn_read_workdim: + case AMDGPUIntrinsic::AMDGPU_read_workdim: // Legacy name. // Really only 2 bits. return lowerImplicitZextParam(DAG, Op, MVT::i8, getImplicitParameterOffset(MFI, GRID_DIM)); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0f8aa5192ce..670ee6525de 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1614,7 +1614,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - case Intrinsic::AMDGPU_rcp: { + case Intrinsic::amdgcn_rcp: { if (const ConstantFP *C = dyn_cast(II->getArgOperand(0))) { const APFloat &ArgVal = C->getValueAPF(); APFloat Val(ArgVal.getSemantics(), 1.0); diff --git a/test/CodeGen/AMDGPU/big_alu.ll b/test/CodeGen/AMDGPU/big_alu.ll index 2671c5d102b..7c98645123e 100644 --- a/test/CodeGen/AMDGPU/big_alu.ll +++ b/test/CodeGen/AMDGPU/big_alu.ll @@ -100,7 +100,7 @@ IF137: ; preds = %main_body %88 = insertelement <4 x float> %87, float %32, i32 2 %89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3 %90 = call float @llvm.AMDGPU.dp4(<4 x float> %85, <4 x float> %89) - %91 = call float @llvm.AMDGPU.rsq.f32(float %90) + %91 = call float @llvm.AMDGPU.rsq.clamped.f32(float %90) %92 = fmul float %30, %91 %93 = fmul float %31, %91 %94 = fmul float %32, %91 @@ -343,7 +343,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %325 = insertelement <4 x float> %324, float %318, i32 2 %326 = insertelement <4 x float> %325, float 0.000000e+00, i32 3 %327 = call float @llvm.AMDGPU.dp4(<4 x float> %322, <4 x float> %326) - %328 = call float @llvm.AMDGPU.rsq.f32(float %327) + %328 = call float @llvm.AMDGPU.rsq.clamped.f32(float %327) %329 = fmul float %314, %328 %330 = fmul float %316, %328 %331 = fmul float %318, %328 @@ -376,7 +376,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %358 = insertelement <4 x float> %357, float %45, i32 2 %359 = insertelement <4 x float> %358, float 0.000000e+00, i32 3 %360 = call float @llvm.AMDGPU.dp4(<4 x float> %355, <4 x float> %359) - %361 = call float @llvm.AMDGPU.rsq.f32(float %360) + %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360) %362 = fmul float %45, %361 %363 = call float @fabs(float %362) %364 = fmul float %176, 0x3FECCCCCC0000000 @@ -402,7 +402,7 @@ ENDIF136: ; preds = %main_body, %ENDIF15 %384 = insertelement <4 x float> %383, float %45, i32 2 %385 = insertelement <4 x float> %384, float 0.000000e+00, i32 3 %386 = call float @llvm.AMDGPU.dp4(<4 x float> %381, <4 x float> %385) - %387 = call float @llvm.AMDGPU.rsq.f32(float %386) + %387 = call float @llvm.AMDGPU.rsq.clamped.f32(float %386) %388 = fmul float %45, %387 %389 = call float @fabs(float %388) %390 = fmul float %176, 0x3FF51EB860000000 @@ -1040,7 +1040,7 @@ IF179: ; preds = %ENDIF175 %896 = insertelement <4 x float> %895, float %45, i32 2 %897 = insertelement <4 x float> %896, float 0.000000e+00, i32 3 %898 = call float @llvm.AMDGPU.dp4(<4 x float> %893, <4 x float> %897) - %899 = call float @llvm.AMDGPU.rsq.f32(float %898) + %899 = call float @llvm.AMDGPU.rsq.clamped.f32(float %898) %900 = fmul float %45, %899 %901 = call float @fabs(float %900) %902 = fmul float %176, 0x3FECCCCCC0000000 @@ -1149,7 +1149,7 @@ ENDIF178: ; preds = %ENDIF175, %IF179 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq.f32(float) #1 +declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) #1 diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll deleted file mode 100644 index d2a655bf909..00000000000 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.f64.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone -declare double @llvm.sqrt.f64(double) nounwind readnone - -; FUNC-LABEL: {{^}}rcp_f64: -; SI: v_rcp_f64_e32 -define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind { - %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}rcp_pat_f64: -; SI: v_rcp_f64_e32 -define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { - %rcp = fdiv double 1.0, %src - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} - -; FUNC-LABEL: {{^}}rsq_rcp_pat_f64: -; SI-UNSAFE: v_rsq_f64_e32 -; SI-SAFE-NOT: v_rsq_f64_e32 -; SI-SAFE: v_sqrt_f64 -; SI-SAFE: v_rcp_f64 -define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind { - %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone - %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone - store double %rcp, double addrspace(1)* %out, align 8 - ret void -} diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll index eeff2536b23..622c11641aa 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.ll @@ -2,6 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; FIXME: Uses of this should be moved to llvm.amdgcn.rsq.clamped, and +; an r600 variant added. declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll deleted file mode 100644 index 36b72f14db1..00000000000 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone - -; FUNC-LABEL: {{^}}rsq_f32: -; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -; EG: RECIPSQRT_IEEE -define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind { - %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone - store float %rsq, float addrspace(1)* %out, align 4 - ret void -} - -; TODO: Really these should be constant folded -; FUNC-LABEL: {{^}}rsq_f32_constant_4.0 -; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0 -; EG: RECIPSQRT_IEEE -define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind { - %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone - store float %rsq, float addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}rsq_f32_constant_100.0 -; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000 -; EG: RECIPSQRT_IEEE -define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind { - %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone - store float %rsq, float addrspace(1)* %out, align 4 - ret void -} diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll similarity index 85% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.class.ll index 80eb3b93f8e..37339215b0b 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 -declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 +declare i1 @llvm.amdgcn.class.f32(float, i32) #1 +declare i1 @llvm.amdgcn.class.f64(double, i32) #1 declare i32 @llvm.r600.read.tidig.x() #1 declare float @llvm.fabs.f32(float) #1 declare double @llvm.fabs.f64(double) #1 @@ -15,7 +15,7 @@ declare double @llvm.fabs.f64(double) #1 ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -31,7 +31,7 @@ define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { ; SI: s_endpgm define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 - %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -47,7 +47,7 @@ define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { ; SI: s_endpgm define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fneg = fsub float -0.0, %a - %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -64,7 +64,7 @@ define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 %a.fneg.fabs = fsub float -0.0, %a.fabs - %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -77,7 +77,7 @@ define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -90,7 +90,7 @@ define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -105,7 +105,7 @@ define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -119,7 +119,7 @@ define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -138,7 +138,7 @@ define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace( %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -156,7 +156,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* % %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in - %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -176,7 +176,7 @@ define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i3 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in - %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -191,7 +191,7 @@ define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i3 ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -207,7 +207,7 @@ define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { ; SI: s_endpgm define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 - %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -223,7 +223,7 @@ define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { ; SI: s_endpgm define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fneg = fsub double -0.0, %a - %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -240,7 +240,7 @@ define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 %a.fneg.fabs = fsub double -0.0, %a.fabs - %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -250,7 +250,7 @@ define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}} ; SI: s_endpgm define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -260,7 +260,7 @@ define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}} ; SI: s_endpgm define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -276,7 +276,7 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -296,7 +296,7 @@ define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load double, double addrspace(1)* %in - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -312,7 +312,7 @@ define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* % %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in - %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -327,7 +327,7 @@ define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i3 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in - %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1 + %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %gep.out, align 4 ret void @@ -344,8 +344,8 @@ define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1) %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 @@ -364,9 +364,9 @@ define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1 %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1 - %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 + %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 %or.0 = or i1 %class0, %class1 %or.1 = or i1 %or.0, %class2 @@ -387,16 +387,16 @@ define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float ad %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1 - %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 - %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1 - %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1 - %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1 - %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1 - %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1 - %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1 - %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 + %class2 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 + %class3 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1 + %class4 = call i1 @llvm.amdgcn.class.f32(float %a, i32 16) #1 + %class5 = call i1 @llvm.amdgcn.class.f32(float %a, i32 32) #1 + %class6 = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 + %class7 = call i1 @llvm.amdgcn.class.f32(float %a, i32 128) #1 + %class8 = call i1 @llvm.amdgcn.class.f32(float %a, i32 256) #1 + %class9 = call i1 @llvm.amdgcn.class.f32(float %a, i32 512) #1 %or.0 = or i1 %class0, %class1 %or.1 = or i1 %or.0, %class2 %or.2 = or i1 %or.1, %class3 @@ -422,8 +422,8 @@ define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1) %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 @@ -442,8 +442,8 @@ define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1) %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 @@ -462,8 +462,8 @@ define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in - %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1 - %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1 + %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 + %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 @@ -477,7 +477,7 @@ define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1 + %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void @@ -489,7 +489,7 @@ define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { - %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1 + %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1 %sext = sext i1 %result to i32 store i32 %sext, i32 addrspace(1)* %out, align 4 ret void diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll similarity index 84% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll index 55ca9c7536e..f9b390eca0c 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fixup.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s -declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone -declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone +declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone +declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readnone ; GCN-LABEL: {{^}}test_div_fixup_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb @@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readn ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { - %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone + %result = call float @llvm.amdgcn.div.fixup.f32(float %a, float %b, float %c) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -25,7 +25,7 @@ define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, fl ; GCN-LABEL: {{^}}test_div_fixup_f64: ; GCN: v_div_fixup_f64 define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { - %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone + %result = call double @llvm.amdgcn.div.fixup.f64(double %a, double %b, double %c) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll similarity index 90% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll index 7dc094ed1b4..6bda39cf7c2 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -4,8 +4,8 @@ ; FIXME: Enable for VI. declare i32 @llvm.r600.read.tidig.x() nounwind readnone -declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone -declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone +declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone +declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone ; GCN-LABEL: {{^}}test_div_fmas_f32: ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb @@ -21,7 +21,7 @@ declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind re ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -35,7 +35,7 @@ define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, flo ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -49,7 +49,7 @@ define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -63,7 +63,7 @@ define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, ; SI: buffer_store_dword [[RESULT]], ; SI: s_endpgm define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -71,7 +71,7 @@ define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, ; GCN-LABEL: {{^}}test_div_fmas_f64: ; GCN: v_div_fmas_f64 define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind { - %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone + %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void } @@ -81,7 +81,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind { %cmp = icmp eq i32 %i, 0 - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -90,7 +90,7 @@ define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, f ; SI: s_mov_b64 vcc, 0 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -99,7 +99,7 @@ define void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, f ; SI: s_mov_b64 vcc, -1 ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -129,7 +129,7 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo %cmp1 = icmp ne i32 %d, 0 %and = and i1 %cmp0, %cmp1 - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone store float %result, float addrspace(1)* %gep.out, align 4 ret void } @@ -172,7 +172,7 @@ bb: exit: %cond = phi i1 [false, %entry], [%cmp1, %bb] - %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone + %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone store float %result, float addrspace(1)* %gep.out, align 4 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll similarity index 92% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll index de830de039c..1b4104c3576 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_scale.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone -declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone -declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone +declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone +declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone ; SI-LABEL @test_div_scale_f32_1: @@ -19,7 +19,7 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %a = load float, float addrspace(1)* %gep.0, align 4 %b = load float, float addrspace(1)* %gep.1, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -39,7 +39,7 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %a = load float, float addrspace(1)* %gep.0, align 4 %b = load float, float addrspace(1)* %gep.1, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -59,7 +59,7 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1) %a = load double, double addrspace(1)* %gep.0, align 8 %b = load double, double addrspace(1)* %gep.1, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -79,7 +79,7 @@ define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1) %a = load double, double addrspace(1)* %gep.0, align 8 %b = load double, double addrspace(1)* %gep.1, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -97,7 +97,7 @@ define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float add %b = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -115,7 +115,7 @@ define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float add %b = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -133,7 +133,7 @@ define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float add %a = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -151,7 +151,7 @@ define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float add %a = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -169,7 +169,7 @@ define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double a %b = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -187,7 +187,7 @@ define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double a %b = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -205,7 +205,7 @@ define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double a %a = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -223,7 +223,7 @@ define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double a %a = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -237,7 +237,7 @@ define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double a ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind { - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -251,7 +251,7 @@ define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind { - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -266,7 +266,7 @@ define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind { - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -281,7 +281,7 @@ define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double % ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind { - %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -297,7 +297,7 @@ define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float a %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -313,7 +313,7 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -335,7 +335,7 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -357,7 +357,7 @@ define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspa %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone - %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll similarity index 54% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll index a59c0ce6d67..c48d52d150b 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.ldexp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll @@ -1,6 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +declare float @llvm.amdgcn.ldexp.f32(float, i32) nounwind readnone +declare double @llvm.amdgcn.ldexp.f64(double, i32) nounwind readnone + declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone @@ -8,7 +11,7 @@ declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone ; SI: v_ldexp_f32 ; SI: s_endpgm define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { - %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone + %result = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b) nounwind readnone store float %result, float addrspace(1)* %out, align 4 ret void } @@ -17,6 +20,24 @@ define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind ; SI: v_ldexp_f64 ; SI: s_endpgm define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { + %result = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b) nounwind readnone + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: {{^}}test_legacy_ldexp_f32: +; SI: v_ldexp_f32 +; SI: s_endpgm +define void @test_legacy_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { + %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: {{^}}test_legacy_ldexp_f64: +; SI: v_ldexp_f64 +; SI: s_endpgm +define void @test_legacy_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll similarity index 53% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll index edd6e9a72f1..0988e43299c 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.rcp.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -5,32 +5,26 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +declare float @llvm.amdgcn.rcp.f32(float) #0 +declare double @llvm.amdgcn.rcp.f64(double) #0 -declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone -declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone +declare double @llvm.sqrt.f64(double) #0 +declare float @llvm.sqrt.f32(float) #0 -declare float @llvm.sqrt.f32(float) nounwind readnone ; FUNC-LABEL: {{^}}rcp_f32: ; SI: v_rcp_f32_e32 -; EG: RECIP_IEEE -define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind { - %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone +define void @rcp_f32(float addrspace(1)* %out, float %src) #1 { + %rcp = call float @llvm.amdgcn.rcp.f32(float %src) #0 store float %rcp, float addrspace(1)* %out, align 4 ret void } -; FIXME: Evergreen only ever does unsafe fp math. ; FUNC-LABEL: {{^}}rcp_pat_f32: ; SI-SAFE: v_rcp_f32_e32 ; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32 - -; EG: RECIP_IEEE - -define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { +define void @rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { %rcp = fdiv float 1.0, %src store float %rcp, float addrspace(1)* %out, align 4 ret void @@ -40,11 +34,40 @@ define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { ; SI-UNSAFE: v_rsq_f32_e32 ; SI-SAFE: v_sqrt_f32_e32 ; SI-SAFE: v_rcp_f32_e32 - -; EG: RECIPSQRT_IEEE -define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { - %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone - %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone +define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { + %sqrt = call float @llvm.sqrt.f32(float %src) #0 + %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) #0 store float %rcp, float addrspace(1)* %out, align 4 ret void } + +; FUNC-LABEL: {{^}}rcp_f64: +; SI: v_rcp_f64_e32 +define void @rcp_f64(double addrspace(1)* %out, double %src) #1 { + %rcp = call double @llvm.amdgcn.rcp.f64(double %src) #0 + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}rcp_pat_f64: +; SI: v_rcp_f64_e32 +define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { + %rcp = fdiv double 1.0, %src + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}rsq_rcp_pat_f64: +; SI-UNSAFE: v_rsq_f64_e32 +; SI-SAFE-NOT: v_rsq_f64_e32 +; SI-SAFE: v_sqrt_f64 +; SI-SAFE: v_rcp_f64 +define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { + %sqrt = call double @llvm.sqrt.f64(double %src) #0 + %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) #0 + store double %rcp, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll similarity index 60% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll index 2e299e30b8c..76a5757e4c2 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.read.workdim.ll @@ -1,23 +1,19 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s - -; FUNC-LABEL: {{^}}read_workdim: -; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV * [[VAL]], KC0[2].Z +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA %s +; GCN-LABEL: {{^}}read_workdim: ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NOHSA: buffer_store_dword [[VVAL]] define void @read_workdim(i32 addrspace(1)* %out) { entry: - %0 = call i32 @llvm.AMDGPU.read.workdim() #0 + %0 = call i32 @llvm.amdgcn.read.workdim() #0 store i32 %0, i32 addrspace(1)* %out ret void } -; FUNC-LABEL: {{^}}read_workdim_known_bits: +; GCN-LABEL: {{^}}read_workdim_known_bits: ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c ; GCN-NOT: 0xff @@ -25,13 +21,26 @@ entry: ; GCN: buffer_store_dword [[VVAL]] define void @read_workdim_known_bits(i32 addrspace(1)* %out) { entry: - %dim = call i32 @llvm.AMDGPU.read.workdim() #0 + %dim = call i32 @llvm.amdgcn.read.workdim() #0 %shl = shl i32 %dim, 24 %shr = lshr i32 %shl, 24 store i32 %shr, i32 addrspace(1)* %out ret void } +; GCN-LABEL: {{^}}legacy_read_workdim: +; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb +; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c +; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] +; GCN-NOHSA: buffer_store_dword [[VVAL]] +define void @legacy_read_workdim(i32 addrspace(1)* %out) { +entry: + %dim = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %dim, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.amdgcn.read.workdim() #0 declare i32 @llvm.AMDGPU.read.workdim() #0 -attributes #0 = { readnone } +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll similarity index 87% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll index 67f1d22c717..be7398cbb85 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.rsq.clamped.f64.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamped.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s -declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone +declare double @llvm.amdgcn.rsq.clamped.f64(double) nounwind readnone ; FUNC-LABEL: {{^}}rsq_clamped_f64: ; SI: v_rsq_clamp_f64_e32 @@ -17,7 +17,7 @@ declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone ; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]] define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind { - %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone + %rsq_clamped = call double @llvm.amdgcn.rsq.clamped.f64(double %src) nounwind readnone store double %rsq_clamped, double addrspace(1)* %out, align 8 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll new file mode 100644 index 00000000000..4c5489a4632 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll @@ -0,0 +1,60 @@ +; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare float @llvm.amdgcn.rsq.f32(float) #0 +declare double @llvm.amdgcn.rsq.f64(double) #0 + +; FUNC-LABEL: {{^}}rsq_f32: +; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @rsq_f32(float addrspace(1)* %out, float %src) #1 { + %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0 + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} + +; TODO: Really these should be constant folded +; FUNC-LABEL: {{^}}rsq_f32_constant_4.0 +; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0 +define void @rsq_f32_constant_4.0(float addrspace(1)* %out) #1 { + %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0 + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rsq_f32_constant_100.0 +; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000 +define void @rsq_f32_constant_100.0(float addrspace(1)* %out) #1 { + %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0 + store float %rsq, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rsq_f64: +; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @rsq_f64(double addrspace(1)* %out, double %src) #1 { + %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0 + store double %rsq, double addrspace(1)* %out, align 4 + ret void +} + +; TODO: Really these should be constant folded +; FUNC-LABEL: {{^}}rsq_f64_constant_4.0 +; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0 +define void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 { + %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0 + store double %rsq, double addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}rsq_f64_constant_100.0 +; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000 +; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}} +; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} +define void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 { + %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0 + store double %rsq, double addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll similarity index 86% rename from test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll rename to test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll index 6b546a7e17c..7757e411553 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trig_preop.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone +declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone ; SI-LABEL: {{^}}test_trig_preop_f64: ; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]] @@ -12,7 +12,7 @@ declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load double, double addrspace(1)* %aptr, align 8 %b = load i32, i32 addrspace(1)* %bptr, align 4 - %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone + %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void } @@ -24,7 +24,7 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* ; SI: s_endpgm define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { %a = load double, double addrspace(1)* %aptr, align 8 - %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone + %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) nounwind readnone store double %result, double addrspace(1)* %out, align 8 ret void } diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll new file mode 100644 index 00000000000..2f5947395c4 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.r600.read.workdim.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s + +; EG-LABEL: {{^}}read_workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].Z +define void @read_workdim(i32 addrspace(1)* %out) { +entry: + %dim = call i32 @llvm.r600.read.workdim() #0 + store i32 %dim, i32 addrspace(1)* %out + ret void +} + +; EG-LABEL: {{^}}read_workdim_known_bits: +define void @read_workdim_known_bits(i32 addrspace(1)* %out) { +entry: + %dim = call i32 @llvm.r600.read.workdim() #0 + %shl = shl i32 %dim, 24 + %shr = lshr i32 %shl, 24 + store i32 %shr, i32 addrspace(1)* %out + ret void +} + +; EG-LABEL: {{^}}legacy_read_workdim: +; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] +; EG: MOV * [[VAL]], KC0[2].Z +define void @legacy_read_workdim(i32 addrspace(1)* %out) { +entry: + %dim = call i32 @llvm.AMDGPU.read.workdim() #0 + store i32 %dim, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.r600.read.workdim() #0 +declare i32 @llvm.AMDGPU.read.workdim() #0 + +attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/pv.ll b/test/CodeGen/AMDGPU/pv.ll index 9a57dd19765..fda812af5e2 100644 --- a/test/CodeGen/AMDGPU/pv.ll +++ b/test/CodeGen/AMDGPU/pv.ll @@ -103,7 +103,7 @@ main_body: %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) %97 = call float @fabs(float %96) - %98 = call float @llvm.AMDGPU.rsq.f32(float %97) + %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = fmul float %4, %98 %100 = fmul float %5, %98 %101 = fmul float %6, %98 @@ -119,10 +119,10 @@ main_body: %111 = extractelement <4 x float> %110, i32 2 %112 = fmul float %111, %10 %113 = fadd float %112, %22 - %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00) - %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) - %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00) - %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %114 = call float @llvm.AMDGPU.clamp.f32(float %105, float 0.000000e+00, float 1.000000e+00) + %115 = call float @llvm.AMDGPU.clamp.f32(float %109, float 0.000000e+00, float 1.000000e+00) + %116 = call float @llvm.AMDGPU.clamp.f32(float %113, float 0.000000e+00, float 1.000000e+00) + %117 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00) %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %119 = extractelement <4 x float> %118, i32 0 %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) @@ -202,9 +202,9 @@ main_body: %194 = fadd float %193, %188 %195 = fmul float %181, %174 %196 = fadd float %195, %190 - %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00) - %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00) - %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) + %197 = call float @llvm.AMDGPU.clamp.f32(float %192, float 0.000000e+00, float 1.000000e+00) + %198 = call float @llvm.AMDGPU.clamp.f32(float %194, float 0.000000e+00, float 1.000000e+00) + %199 = call float @llvm.AMDGPU.clamp.f32(float %196, float 0.000000e+00, float 1.000000e+00) %200 = insertelement <4 x float> undef, float %75, i32 0 %201 = insertelement <4 x float> %200, float %79, i32 1 %202 = insertelement <4 x float> %201, float %83, i32 2 @@ -225,10 +225,10 @@ declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare float @fabs(float) #2 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq.f32(float) #1 +declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone -declare float @llvm.AMDIL.clamp.(float, float, float) #1 +declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 diff --git a/test/CodeGen/AMDGPU/rcp-pattern.ll b/test/CodeGen/AMDGPU/rcp-pattern.ll new file mode 100644 index 00000000000..b1d42206254 --- /dev/null +++ b/test/CodeGen/AMDGPU/rcp-pattern.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; FIXME: Evergreen only ever does unsafe fp math. +; FUNC-LABEL: {{^}}rcp_pat_f32: +; EG: RECIP_IEEE +define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind { + %rcp = fdiv float 1.0, %src + store float %rcp, float addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll index b849c4038bc..3e70a84d67e 100644 --- a/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -71,7 +71,7 @@ main_body: %55 = fadd float %54, %53 %56 = fmul float %45, %45 %57 = fadd float %55, %56 - %58 = call float @llvm.AMDGPU.rsq.f32(float %57) + %58 = call float @llvm.amdgcn.rsq.f32(float %57) %59 = fmul float %43, %58 %60 = fmul float %44, %58 %61 = fmul float %45, %58 @@ -213,7 +213,7 @@ declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq.f32(float) #3 +declare float @llvm.amdgcn.rsq.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll index d7b35fc631e..6c94bbc760c 100644 --- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -215,7 +215,7 @@ main_body: %198 = fadd float %197, %196 %199 = fmul float %97, %97 %200 = fadd float %198, %199 - %201 = call float @llvm.AMDGPU.rsq.f32(float %200) + %201 = call float @llvm.amdgcn.rsq.f32(float %200) %202 = fmul float %95, %201 %203 = fmul float %96, %201 %204 = fmul float %202, %29 @@ -396,7 +396,7 @@ IF67: ; preds = %LOOP65 %355 = fadd float %354, %353 %356 = fmul float %352, %352 %357 = fadd float %355, %356 - %358 = call float @llvm.AMDGPU.rsq.f32(float %357) + %358 = call float @llvm.amdgcn.rsq.f32(float %357) %359 = fmul float %350, %358 %360 = fmul float %351, %358 %361 = fmul float %352, %358 @@ -524,7 +524,7 @@ IF67: ; preds = %LOOP65 %483 = fadd float %482, %481 %484 = fmul float %109, %109 %485 = fadd float %483, %484 - %486 = call float @llvm.AMDGPU.rsq.f32(float %485) + %486 = call float @llvm.amdgcn.rsq.f32(float %485) %487 = fmul float %107, %486 %488 = fmul float %108, %486 %489 = fmul float %109, %486 @@ -553,7 +553,7 @@ IF67: ; preds = %LOOP65 %512 = fadd float %511, %510 %513 = fmul float %97, %97 %514 = fadd float %512, %513 - %515 = call float @llvm.AMDGPU.rsq.f32(float %514) + %515 = call float @llvm.amdgcn.rsq.f32(float %514) %516 = fmul float %95, %515 %517 = fmul float %96, %515 %518 = fmul float %97, %515 @@ -670,7 +670,7 @@ declare i32 @llvm.SI.tid() #2 declare float @ceil(float) #3 ; Function Attrs: readnone -declare float @llvm.AMDGPU.rsq.f32(float) #2 +declare float @llvm.amdgcn.rsq.f32(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1 @@ -899,7 +899,7 @@ main_body: %212 = fadd float %211, %210 %213 = fmul float %209, %209 %214 = fadd float %212, %213 - %215 = call float @llvm.AMDGPU.rsq.f32(float %214) + %215 = call float @llvm.amdgcn.rsq.f32(float %214) %216 = fmul float %205, %215 %217 = fmul float %207, %215 %218 = fmul float %209, %215 @@ -1135,7 +1135,7 @@ IF189: ; preds = %LOOP %434 = fsub float -0.000000e+00, %433 %435 = fadd float 0x3FF00068E0000000, %434 %436 = call float @llvm.AMDIL.clamp.(float %435, float 0.000000e+00, float 1.000000e+00) - %437 = call float @llvm.AMDGPU.rsq.f32(float %436) + %437 = call float @llvm.amdgcn.rsq.f32(float %436) %438 = fmul float %437, %436 %439 = fsub float -0.000000e+00, %436 %440 = call float @llvm.AMDGPU.cndlt(float %439, float %438, float 0.000000e+00) @@ -1159,7 +1159,7 @@ IF189: ; preds = %LOOP %458 = fadd float %457, %456 %459 = fmul float %455, %455 %460 = fadd float %458, %459 - %461 = call float @llvm.AMDGPU.rsq.f32(float %460) + %461 = call float @llvm.amdgcn.rsq.f32(float %460) %462 = fmul float %451, %461 %463 = fmul float %453, %461 %464 = fmul float %455, %461 @@ -1269,7 +1269,7 @@ ENDIF197: ; preds = %IF189, %IF198 %559 = fadd float %558, %557 %560 = fmul float %556, %556 %561 = fadd float %559, %560 - %562 = call float @llvm.AMDGPU.rsq.f32(float %561) + %562 = call float @llvm.amdgcn.rsq.f32(float %561) %563 = fmul float %562, %561 %564 = fsub float -0.000000e+00, %561 %565 = call float @llvm.AMDGPU.cndlt(float %564, float %563, float 0.000000e+00) diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll similarity index 61% rename from test/Transforms/InstCombine/r600-intrinsics.ll rename to test/Transforms/InstCombine/amdgcn-intrinsics.ll index 1db6b0d28bf..387ad31f253 100644 --- a/test/Transforms/InstCombine/r600-intrinsics.ll +++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -1,47 +1,47 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s -declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone -declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone +declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone +declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone ; CHECK-LABEL: @test_constant_fold_rcp_f32_1 ; CHECK-NEXT: ret float 1.000000e+00 define float @test_constant_fold_rcp_f32_1() nounwind { - %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone + %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone ret float %val } ; CHECK-LABEL: @test_constant_fold_rcp_f64_1 ; CHECK-NEXT: ret double 1.000000e+00 define double @test_constant_fold_rcp_f64_1() nounwind { - %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone + %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone ret double %val } ; CHECK-LABEL: @test_constant_fold_rcp_f32_half ; CHECK-NEXT: ret float 2.000000e+00 define float @test_constant_fold_rcp_f32_half() nounwind { - %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone + %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone ret float %val } ; CHECK-LABEL: @test_constant_fold_rcp_f64_half ; CHECK-NEXT: ret double 2.000000e+00 define double @test_constant_fold_rcp_f64_half() nounwind { - %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone + %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone ret double %val } ; CHECK-LABEL: @test_constant_fold_rcp_f32_43 -; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) +; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) define float @test_constant_fold_rcp_f32_43() nounwind { - %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone + %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone ret float %val } ; CHECK-LABEL: @test_constant_fold_rcp_f64_43 -; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) +; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) define double @test_constant_fold_rcp_f64_43() nounwind { - %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone + %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone ret double %val }