From 53e15805872a6c3a4038f3546eaab4c2d4ed5189 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 3 Sep 2016 07:06:58 +0000 Subject: [PATCH] AMDGPU: Do basic folding of class intrinsic This allows more of the OCML builtin library to be constant folded. llvm-svn: 280586 --- .../InstCombine/InstCombineCalls.cpp | 79 ++++++ .../InstCombine/amdgcn-intrinsics.ll | 237 ++++++++++++++++++ 2 files changed, 316 insertions(+) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 00fdc46eea8..ca868f056f0 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2237,6 +2237,85 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::amdgcn_class: { + enum { + S_NAN = 1 << 0, // Signaling NaN + Q_NAN = 1 << 1, // Quiet NaN + N_INFINITY = 1 << 2, // Negative infinity + N_NORMAL = 1 << 3, // Negative normal + N_SUBNORMAL = 1 << 4, // Negative subnormal + N_ZERO = 1 << 5, // Negative zero + P_ZERO = 1 << 6, // Positive zero + P_SUBNORMAL = 1 << 7, // Positive subnormal + P_NORMAL = 1 << 8, // Positive normal + P_INFINITY = 1 << 9 // Positive infinity + }; + + const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL | + N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | P_NORMAL | P_INFINITY; + + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + const ConstantInt *CMask = dyn_cast(Src1); + if (!CMask) { + if (isa(Src0)) + return replaceInstUsesWith(*II, UndefValue::get(II->getType())); + + if (isa(Src1)) + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false)); + break; + } + + uint32_t Mask = CMask->getZExtValue(); + + // If all tests are made, it doesn't matter what the value is. + if ((Mask & FullMask) == FullMask) + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true)); + + if ((Mask & FullMask) == 0) + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false)); + + if (Mask == (S_NAN | Q_NAN)) { + // Equivalent of isnan. Replace with standard fcmp. + Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0); + FCmp->takeName(II); + return replaceInstUsesWith(*II, FCmp); + } + + const ConstantFP *CVal = dyn_cast(Src0); + if (!CVal) { + if (isa(Src0)) + return replaceInstUsesWith(*II, UndefValue::get(II->getType())); + + // Clamp mask to used bits + if ((Mask & FullMask) != Mask) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) } + ); + + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); + } + + break; + } + + const APFloat &Val = CVal->getValueAPF(); + + bool Result = + ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) || + ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) || + ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) || + ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) || + ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) || + ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) || + ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) || + ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) || + ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) || + ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative()); + + return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result)); + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll index a734924f170..3c38e789062 100644 --- a/test/Transforms/InstCombine/amdgcn-intrinsics.ll +++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -362,3 +362,240 @@ define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind { ret i32 %val } +; -------------------------------------------------------------------- +; llvm.amdgcn.class +; -------------------------------------------------------------------- + +declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone +declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone + +; CHECK-LABEL: @test_class_undef_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_undef_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef) + ret i1 %val +} + +; CHECK-LABEL: @test_class_over_max_mask_f32( +; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1) +define i1 @test_class_over_max_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025) + ret i1 %val +} + +; CHECK-LABEL: @test_class_no_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_no_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0) + ret i1 %val +} + +; CHECK-LABEL: @test_class_full_mask_f32( +; CHECK: ret i1 true +define i1 @test_class_full_mask_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_no_mask_f32( +; CHECK: ret i1 false +define i1 @test_class_undef_no_mask_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_full_mask_f32( +; CHECK: ret i1 true +define i1 @test_class_undef_full_mask_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_val_f32( +; CHECK: ret i1 undef +define i1 @test_class_undef_val_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_class_undef_undef_f32( +; CHECK: ret i1 undef +define i1 @test_class_undef_undef_f32() nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef) + ret i1 %val +} + +; CHECK-LABEL: @test_class_var_mask_f32( +; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) +define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask) + ret i1 %val +} + +; CHECK-LABEL: @test_class_isnan_f32( +; CHECK: %val = fcmp uno float %x, 0.000000e+00 +define i1 @test_class_isnan_f32(float %x) nounwind { + %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_snan_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_snan_test_snan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_qnan_test_qnan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_snan_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_ninf_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pinf_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_snan_test_ninf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_nzero_test_nzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_pzero_test_nzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pzero_test_pzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nzero_test_pzero_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64( +; CHECK: ret i1 true +define i1 @test_constant_class_pinf_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_ninf_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_qnan_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512) + ret i1 %val +} + +; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64( +; CHECK: ret i1 false +define i1 @test_constant_class_snan_test_pinf_f64() nounwind { + %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512) + ret i1 %val +}