mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
Speedup some unicode rendering
Use a fast path for column width computation for ascii characters. Especially relevant for llvm-objdump. before: % time ./bin/llvm-objdump -D -j .text /lib/libc.so.6 >/dev/null ./bin/llvm-objdump -D -j .text /lib/libc.so.6 > /dev/null 0.75s user 0.01s system 99% cpu 0.757 total after: % time ./bin/llvm-objdump -D -j .text /lib/libc.so.6 >/dev/null ./bin/llvm-objdump -D -j .text /lib/libc.so.6 > /dev/null 0.37s user 0.01s system 99% cpu 0.378 total Differential Revision: https://reviews.llvm.org/D92180
This commit is contained in:
parent
5d70260f30
commit
8db0e1abee
@ -339,11 +339,22 @@ static inline int charWidth(int UCS)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool isprintableascii(char c) { return c > 31 && c < 127; }
|
||||
|
||||
int columnWidthUTF8(StringRef Text) {
|
||||
unsigned ColumnWidth = 0;
|
||||
unsigned Length;
|
||||
for (size_t i = 0, e = Text.size(); i < e; i += Length) {
|
||||
Length = getNumBytesForUTF8(Text[i]);
|
||||
|
||||
// fast path for ASCII characters
|
||||
if (Length == 1) {
|
||||
if (!isprintableascii(Text[i]))
|
||||
return ErrorNonPrintableCharacter;
|
||||
ColumnWidth += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Length <= 0 || i + Length > Text.size())
|
||||
return ErrorInvalidUTF8;
|
||||
UTF32 buf[1];
|
||||
|
@ -7,6 +7,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Support/Unicode.h"
|
||||
#include "llvm/Support/ConvertUTF.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -23,6 +24,7 @@ TEST(Unicode, columnWidthUTF8) {
|
||||
EXPECT_EQ(6, columnWidthUTF8("abcdef"));
|
||||
|
||||
EXPECT_EQ(-1, columnWidthUTF8("\x01"));
|
||||
EXPECT_EQ(-1, columnWidthUTF8("\t"));
|
||||
EXPECT_EQ(-1, columnWidthUTF8("aaaaaaaaaa\x01"));
|
||||
EXPECT_EQ(-1, columnWidthUTF8("\342\200\213")); // 200B ZERO WIDTH SPACE
|
||||
|
||||
@ -84,6 +86,19 @@ TEST(Unicode, isPrintable) {
|
||||
EXPECT_TRUE(isPrintable(0x20000)); // CJK UNIFIED IDEOGRAPH-20000
|
||||
|
||||
EXPECT_FALSE(isPrintable(0x10FFFF)); // noncharacter
|
||||
|
||||
// test the validity of a fast path in columnWidthUTF8
|
||||
for (unsigned char c = 0; c < 128; ++c) {
|
||||
const UTF8 buf8[2] = {c, 0};
|
||||
const UTF8 *Target8 = &buf8[0];
|
||||
UTF32 buf32[1];
|
||||
UTF32 *Target32 = &buf32[0];
|
||||
auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32,
|
||||
Target32 + 1, strictConversion);
|
||||
EXPECT_TRUE(status == conversionOK);
|
||||
EXPECT_TRUE((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1) ==
|
||||
(bool)isPrintable(buf32[0]));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
Loading…
Reference in New Issue
Block a user