//===- llvm/unittest/Support/formatted_raw_ostream_test.cpp ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" using namespace llvm; namespace { TEST(formatted_raw_ostreamTest, Test_Tell) { // Check offset when underlying stream has buffer contents. SmallString<128> A; raw_svector_ostream B(A); formatted_raw_ostream C(B); char tmp[100] = ""; for (unsigned i = 0; i != 3; ++i) { C.write(tmp, 100); EXPECT_EQ(100*(i+1), (unsigned) C.tell()); } } TEST(formatted_raw_ostreamTest, Test_LineColumn) { // Test tracking of line and column numbers in a stream. SmallString<128> A; raw_svector_ostream B(A); formatted_raw_ostream C(B); EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(0U, C.getColumn()); C << "a"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(1U, C.getColumn()); C << "bcdef"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(6U, C.getColumn()); // '\n' increments line number, sets column to zero. C << "\n"; EXPECT_EQ(1U, C.getLine()); EXPECT_EQ(0U, C.getColumn()); // '\r sets column to zero without changing line number C << "foo\r"; EXPECT_EQ(1U, C.getLine()); EXPECT_EQ(0U, C.getColumn()); // '\t' advances column to the next multiple of 8. // FIXME: If the column number is already a multiple of 8 this will do // nothing, is this behaviour correct? C << "1\t"; EXPECT_EQ(8U, C.getColumn()); C << "\t"; EXPECT_EQ(8U, C.getColumn()); C << "1234567\t"; EXPECT_EQ(16U, C.getColumn()); EXPECT_EQ(1U, C.getLine()); } TEST(formatted_raw_ostreamTest, Test_Flush) { // Flushing the buffer causes the characters in the buffer to be scanned // before the buffer is emptied, so line and column numbers will still be // tracked properly. SmallString<128> A; raw_svector_ostream B(A); B.SetBufferSize(32); formatted_raw_ostream C(B); C << "\nabc"; EXPECT_EQ(4U, C.GetNumBytesInBuffer()); C.flush(); EXPECT_EQ(1U, C.getLine()); EXPECT_EQ(3U, C.getColumn()); EXPECT_EQ(0U, C.GetNumBytesInBuffer()); } TEST(formatted_raw_ostreamTest, Test_UTF8) { SmallString<128> A; raw_svector_ostream B(A); B.SetBufferSize(32); formatted_raw_ostream C(B); // U+00A0 Non-breaking space: encoded as two bytes, but only one column wide. C << u8"\u00a0"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(1U, C.getColumn()); EXPECT_EQ(2U, C.GetNumBytesInBuffer()); // U+2468 CIRCLED DIGIT NINE: encoded as three bytes, but only one column // wide. C << u8"\u2468"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(2U, C.getColumn()); EXPECT_EQ(5U, C.GetNumBytesInBuffer()); // U+00010000 LINEAR B SYLLABLE B008 A: encoded as four bytes, but only one // column wide. C << u8"\U00010000"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(3U, C.getColumn()); EXPECT_EQ(9U, C.GetNumBytesInBuffer()); // U+55B5, CJK character, encodes as three bytes, takes up two columns. C << u8"\u55b5"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(5U, C.getColumn()); EXPECT_EQ(12U, C.GetNumBytesInBuffer()); // U+200B, zero-width space, encoded as three bytes but has no effect on the // column or line number. C << u8"\u200b"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(5U, C.getColumn()); EXPECT_EQ(15U, C.GetNumBytesInBuffer()); } TEST(formatted_raw_ostreamTest, Test_UTF8Buffered) { SmallString<128> A; raw_svector_ostream B(A); B.SetBufferSize(4); formatted_raw_ostream C(B); // U+2468 encodes as three bytes, so will cause the buffer to be flushed after // the first byte (4 byte buffer, 3 bytes already written). We need to save // the first part of the UTF-8 encoding until after the buffer is cleared and // the remaining two bytes are written, at which point we can check the // display width. In this case the display width is 1, so we end at column 4, // with 6 bytes written into total, 2 of which are in the buffer. C << u8"123\u2468"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(4U, C.getColumn()); EXPECT_EQ(2U, C.GetNumBytesInBuffer()); C.flush(); EXPECT_EQ(6U, A.size()); // Same as above, but with a CJK character which displays as two columns. C << u8"123\u55b5"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(9U, C.getColumn()); EXPECT_EQ(2U, C.GetNumBytesInBuffer()); C.flush(); EXPECT_EQ(12U, A.size()); } TEST(formatted_raw_ostreamTest, Test_UTF8TinyBuffer) { SmallString<128> A; raw_svector_ostream B(A); B.SetBufferSize(1); formatted_raw_ostream C(B); // The stream has a one-byte buffer, so it gets flushed multiple times while // printing a single Unicode character. C << u8"\u2468"; EXPECT_EQ(0U, C.getLine()); EXPECT_EQ(1U, C.getColumn()); EXPECT_EQ(0U, C.GetNumBytesInBuffer()); C.flush(); EXPECT_EQ(3U, A.size()); } }