1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM][AArch64] Implement __cls, __clsl and __clsll intrinsics from ACLE

Summary:
Writing support for three ACLE functions:
  unsigned int __cls(uint32_t x)
  unsigned int __clsl(unsigned long x)
  unsigned int __clsll(uint64_t x)

CLS stands for "Count number of leading sign bits".

In AArch64, these two intrinsics can be translated into the 'cls'
instruction directly. In AArch32, on the other hand, this functionality
is achieved by implementing it in terms of clz (count number of leading
zeros).

Reviewers: compnerd

Reviewed By: compnerd

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D69250
This commit is contained in:
vhscampos 2019-10-17 14:10:30 +01:00
parent d57029a491
commit d767cb8c50
6 changed files with 98 additions and 0 deletions

View File

@ -33,6 +33,9 @@ def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// HINT

View File

@ -843,4 +843,7 @@ def int_arm_mve_vst2q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMat
def int_arm_mve_vst4q: Intrinsic<[], [llvm_anyptr_ty, llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>, llvm_i32_ty], [IntrWriteMem]
>;
def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>;
} // end TargetPrefix

View File

@ -1527,6 +1527,8 @@ def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
(i64 1))),
(CLSXr GPR64:$Rn)>;
def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
// Unlike the other one operand instructions, the instructions with the "rev"
// mnemonic do *not* just different in the size bit, but actually use different

View File

@ -3629,6 +3629,49 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::arm_cls: {
const SDValue &Operand = Op.getOperand(1);
const EVT VTy = Op.getValueType();
SDValue SRA =
DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
SDValue SHL =
DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
SDValue OR =
DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
return Result;
}
case Intrinsic::arm_cls64: {
// cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
// else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
const SDValue &Operand = Op.getOperand(1);
const EVT VTy = Op.getValueType();
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
DAG.getConstant(1, dl, VTy));
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
DAG.getConstant(0, dl, VTy));
SDValue Constant0 = DAG.getConstant(0, dl, VTy);
SDValue Constant1 = DAG.getConstant(1, dl, VTy);
SDValue Constant31 = DAG.getConstant(31, dl, VTy);
SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
SDValue CheckLo =
DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
SDValue HiIsZero =
DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
SDValue AdjustedLo =
DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
SDValue Result =
DAG.getSelect(dl, VTy, CheckLo,
DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
return Result;
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();

View File

@ -0,0 +1,20 @@
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s
; @llvm.aarch64.cls must be directly translated into the 'cls' instruction
; CHECK-LABEL: cls
; CHECK: cls [[REG:w[0-9]+]], [[REG]]
define i32 @cls(i32 %t) {
%cls.i = call i32 @llvm.aarch64.cls(i32 %t)
ret i32 %cls.i
}
; CHECK-LABEL: cls64
; CHECK: cls [[REG:x[0-9]+]], [[REG]]
define i32 @cls64(i64 %t) {
%cls.i = call i32 @llvm.aarch64.cls64(i64 %t)
ret i32 %cls.i
}
declare i32 @llvm.aarch64.cls(i32) nounwind
declare i32 @llvm.aarch64.cls64(i64) nounwind

27
test/CodeGen/ARM/cls.ll Normal file
View File

@ -0,0 +1,27 @@
; RUN: llc -mtriple=armv5 %s -o - | FileCheck %s
; CHECK: eor [[T:r[0-9]+]], [[T]], [[T]], asr #31
; CHECK-NEXT: mov [[C1:r[0-9]+]], #1
; CHECK-NEXT: orr [[T]], [[C1]], [[T]], lsl #1
; CHECK-NEXT: clz [[T]], [[T]]
define i32 @cls(i32 %t) {
%cls.i = call i32 @llvm.arm.cls(i32 %t)
ret i32 %cls.i
}
; CHECK: cmp r1, #0
; CHECK: mvnne [[ADJUSTEDLO:r[0-9]+]], r0
; CHECK: clz [[CLZLO:r[0-9]+]], [[ADJUSTEDLO]]
; CHECK: eor [[A:r[0-9]+]], r1, r1, asr #31
; CHECK: mov r1, #1
; CHECK: orr [[A]], r1, [[A]], lsl #1
; CHECK: clz [[CLSHI:r[0-9]+]], [[A]]
; CHECK: cmp [[CLSHI]], #31
; CHECK: addeq r0, [[CLZLO]], #31
define i32 @cls64(i64 %t) {
%cls.i = call i32 @llvm.arm.cls64(i64 %t)
ret i32 %cls.i
}
declare i32 @llvm.arm.cls(i32) nounwind
declare i32 @llvm.arm.cls64(i64) nounwind