mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
ae65e281f3
to reflect the new license. We understand that people may be surprised that we're moving the header entirely to discuss the new license. We checked this carefully with the Foundation's lawyer and we believe this is the correct approach. Essentially, all code in the project is now made available by the LLVM project under our new license, so you will see that the license headers include that license only. Some of our contributors have contributed code under our old license, and accordingly, we have retained a copy of our old license notice in the top-level files in each project and repository. llvm-svn: 351636
71 lines
2.5 KiB
C++
71 lines
2.5 KiB
C++
//===- llvm/Support/Unicode.h - Unicode character properties -*- C++ -*-=====//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines functions that allow querying certain properties of Unicode
|
|
// characters.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_SUPPORT_UNICODE_H
|
|
#define LLVM_SUPPORT_UNICODE_H
|
|
|
|
namespace llvm {
|
|
class StringRef;
|
|
|
|
namespace sys {
|
|
namespace unicode {
|
|
|
|
enum ColumnWidthErrors {
|
|
ErrorInvalidUTF8 = -2,
|
|
ErrorNonPrintableCharacter = -1
|
|
};
|
|
|
|
/// Determines if a character is likely to be displayed correctly on the
|
|
/// terminal. Exact implementation would have to depend on the specific
|
|
/// terminal, so we define the semantic that should be suitable for generic case
|
|
/// of a terminal capable to output Unicode characters.
|
|
///
|
|
/// All characters from the Unicode code point range are considered printable
|
|
/// except for:
|
|
/// * C0 and C1 control character ranges;
|
|
/// * default ignorable code points as per 5.21 of
|
|
/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
|
|
/// except for U+00AD SOFT HYPHEN, as it's actually displayed on most
|
|
/// terminals;
|
|
/// * format characters (category = Cf);
|
|
/// * surrogates (category = Cs);
|
|
/// * unassigned characters (category = Cn).
|
|
/// \return true if the character is considered printable.
|
|
bool isPrintable(int UCS);
|
|
|
|
/// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
|
|
/// when output on a terminal ("character width"). This depends on the
|
|
/// implementation of the terminal, and there's no standard definition of
|
|
/// character width.
|
|
///
|
|
/// The implementation defines it in a way that is expected to be compatible
|
|
/// with a generic Unicode-capable terminal.
|
|
///
|
|
/// \return Character width:
|
|
/// * ErrorNonPrintableCharacter (-1) if \p Text contains non-printable
|
|
/// characters (as identified by isPrintable);
|
|
/// * 0 for each non-spacing and enclosing combining mark;
|
|
/// * 2 for each CJK character excluding halfwidth forms;
|
|
/// * 1 for each of the remaining characters.
|
|
int columnWidthUTF8(StringRef Text);
|
|
|
|
/// Fold input unicode character according the Simple unicode case folding
|
|
/// rules.
|
|
int foldCharSimple(int C);
|
|
|
|
} // namespace unicode
|
|
} // namespace sys
|
|
} // namespace llvm
|
|
|
|
#endif
|