1
0
mirror of https://github.com/rwengine/openrw.git synced 2024-09-03 00:59:47 +02:00

rwengine: add per-font mapping + unicode support + tests

This commit is contained in:
Anonymous Maarten 2017-09-07 18:44:21 +02:00
parent 74a4398e69
commit 2ad9667b50
19 changed files with 1055 additions and 17 deletions

View File

@ -37,8 +37,6 @@ set(RWENGINE_SOURCES
src/data/Chase.hpp
src/data/CollisionModel.hpp
src/data/CutsceneData.hpp
src/data/GameTexts.cpp
src/data/GameTexts.hpp
src/data/InstanceData.hpp
src/data/ModelData.cpp
src/data/ModelData.hpp

View File

@ -18,11 +18,11 @@
#include <rw/forward.hpp>
#include <data/AnimGroup.hpp>
#include <data/GameTexts.hpp>
#include <data/ModelData.hpp>
#include <data/PedData.hpp>
#include <data/Weather.hpp>
#include <data/ZoneData.hpp>
#include <fonts/GameTexts.hpp>
#include <loaders/LoaderDFF.hpp>
#include <loaders/LoaderIMG.hpp>
#include <loaders/LoaderTXD.hpp>

View File

@ -9,15 +9,13 @@
#include <glm/glm.hpp>
#include <glm/gtc/quaternion.hpp>
#include <data/GameTexts.hpp>
#include <data/VehicleGenerator.hpp>
#include <engine/GameData.hpp>
#include <engine/GameInputState.hpp>
#include <engine/GameWorld.hpp>
#include <engine/ScreenText.hpp>
#include <fonts/GameTexts.hpp>
#include <objects/ObjectTypes.hpp>
#include <script/ScriptTypes.hpp>
class GameWorld;

View File

@ -8,7 +8,7 @@
#include <glm/glm.hpp>
#include <data/GameTexts.hpp>
#include <fonts/GameTexts.hpp>
enum class ScreenTextType {
/// Big text will be rendered according to the proscribed style.

View File

@ -5,10 +5,9 @@
#include <string>
#include <utility>
#include <fonts/GameTexts.hpp>
#include <platform/FileHandle.hpp>
#include "data/GameTexts.hpp"
void LoaderGXT::load(GameTexts &texts, const FileHandle &file) {
auto data = file->data;

View File

@ -11,7 +11,7 @@
#include <gl/DrawBuffer.hpp>
#include <gl/GeometryBuffer.hpp>
#include <data/GameTexts.hpp>
#include <fonts/GameTexts.hpp>
#include <render/OpenGLRenderer.hpp>
#define GAME_FONTS 3

View File

@ -4,11 +4,11 @@
#include <rw/debug.hpp>
#include <ai/AIGraphNode.hpp>
#include <data/GameTexts.hpp>
#include <data/ModelData.hpp>
#include <engine/GameData.hpp>
#include <engine/GameState.hpp>
#include <engine/GameWorld.hpp>
#include <fonts/GameTexts.hpp>
#include <objects/GameObject.hpp>
#include <objects/CharacterObject.hpp>
#include <objects/VehicleObject.hpp>

View File

@ -27,6 +27,15 @@ SET(RWLIB_SOURCES
source/data/Clump.hpp
source/data/Clump.cpp
source/fonts/FontMap.cpp
source/fonts/FontMap.hpp
source/fonts/FontMapGta3.cpp
source/fonts/FontMapGta3.hpp
source/fonts/GameTexts.cpp
source/fonts/GameTexts.hpp
source/fonts/Unicode.cpp
source/fonts/Unicode.hpp
source/loaders/LoaderIMG.hpp
source/loaders/LoaderIMG.cpp
source/loaders/RWBinaryStream.hpp

View File

@ -0,0 +1,79 @@
#include "FontMap.hpp"
#include <rw/debug.hpp>
#include <sstream>
/**
* Use output operations to create GameStrings (only allows write operations)
*/
using OGameStringStream = std::basic_ostringstream<GameStringChar>;
FontMap::FontMap(std::initializer_list<std::reference_wrapper<const gschar_unicode_map_t>> maps) {
for (const auto &map : maps) {
m_to_unicode.insert(map.get().cbegin(), map.get().cend());
for (const auto &m : map.get()) {
m_from_unicode[m.second] = m.first;
}
}
const auto &q = m_from_unicode.find(UNICODE_QUESTION_MARK);
if (q == m_from_unicode.end()) {
RW_ERROR("Font does not have a question mark");
m_unknown_gschar = ' ';
} else {
m_unknown_gschar = q->second;
}
}
GameStringChar FontMap::to_GameStringChar(unicode_t u) const {
if (u < 0x20) {
/* Passthrough control characters */
return u;
}
const auto &p = m_from_unicode.find(u);
if (p == m_from_unicode.end()) {
return m_unknown_gschar;
}
return p->second;
}
unicode_t FontMap::to_unicode(GameStringChar c) const {
if (c < 0x20) {
/* Passthrough control characters */
return c;
}
const auto &p = m_to_unicode.find(c);
if (p == m_to_unicode.end()) {
return UNICODE_REPLACEMENT_CHARACTER;
}
return p->second;
}
std::string FontMap::to_string(const GameString &s) const {
std::ostringstream oss;
for (GameStringChar c: s) {
char buffer[4];
unicode_t u = to_unicode(c);
auto nb = unicode_to_utf8(u, buffer);
oss.write(buffer, nb);
}
return oss.str();
}
GameString FontMap::to_GameString(const std::string &utf8) const {
OGameStringStream oss;
std::istringstream iss(utf8);
for (Utf8UnicodeIterator it(iss); it.good(); ++it) {
GameStringChar c = to_GameStringChar(it.unicode());
oss.write(&c, 1);
}
return oss.str();
}
FontMap::gschar_unicode_iterator FontMap::to_unicode_begin() const {
return m_to_unicode.cbegin();
}
FontMap::gschar_unicode_iterator FontMap::to_unicode_end() const {
return m_to_unicode.cend();
}

View File

@ -0,0 +1,95 @@
#ifndef _RWLIB_FONTS_FONTMAP_HPP_
#define _RWLIB_FONTS_FONTMAP_HPP_
#include <functional>
#include <initializer_list>
#include <map>
#include "GameTexts.hpp"
#include "Unicode.hpp"
/**
* @brief Class providing mapping from unicode chars to game strings and vice versa.
* The conversions of an object of this class depend on the actual font used.
* @param maps
*/
class FontMap {
public:
/**
* Mapping from GameStringChar to unicode data point.
*/
using gschar_unicode_map_t = std::map<GameStringChar, unicode_t>;
/**
* Mapping from unicode data point to GameStringChar.
*/
using unicode_gschar_map_t = std::map<unicode_t, GameStringChar>;
/**
* Iterator type over all GameStringChar to unicode.
*/
using gschar_unicode_iterator = gschar_unicode_map_t::const_iterator;
/**
* @brief FontMap Create a new Fontmapping using the maps provided.
* @param maps List of mappings used as source for this font mapping.
*/
FontMap(std::initializer_list<std::reference_wrapper<const gschar_unicode_map_t>> maps);
/**
* @brief to_GameStringChar Convert a unicode data point to a GameStringChar.
* @param u The unicode character.
* @return A GameStringChar
*/
GameStringChar to_GameStringChar(unicode_t u) const;
/**
* @brief to_unicoe Convert a GameStringChar to a unicode data point.
* @param c The GameStringChar
* @return A unicode character.
*/
unicode_t to_unicode(GameStringChar c) const;
/**
* @brief to_string Convert a GameString to a utf-8 encoded string.
* @param s The GameString.
* @return A utf-8 encoded string.
*/
std::string to_string(const GameString &s) const;
/**
* @brief to_GameString Convert a utf-8 encoded string to a GameString.
* @param utf8 The utf-8 encoded string.
* @return A GameString.
*/
GameString to_GameString(const std::string &utf8) const;
/**
* @brief to_unicode_begin Iterate over the GameStringChar to unicode begin.
* @return Iterator to begin.
*/
gschar_unicode_iterator to_unicode_begin() const;
/**
* @brief to_unicode_begin Iterate over the GameStringChar to unicode end.
* @return Iterator Iterator to end.
*/
gschar_unicode_iterator to_unicode_end() const;
private:
/**
* Mapping from a unicode point to a GameStringChar.
*/
unicode_gschar_map_t m_from_unicode;
/**
* Mapping from a GameStringChar to a unicode point.
*/
gschar_unicode_map_t m_to_unicode;
/**
* GameStringChar used if a unicode point has no corresponding GameStringChar.
*/
GameStringChar m_unknown_gschar;
};
#endif

View File

@ -0,0 +1,208 @@
#include "FontMapGta3.hpp"
static const FontMap::gschar_unicode_map_t map_gta3_font_common = {
{0x20, UNICODE_SPACE},
{0x21, UNICODE_EXCLAMATION_MARK},
{0x24, UNICODE_DOLLAR_SIGN},
{0x25, UNICODE_PROCENT_SIGN},
{0x26, UNICODE_AMPERSAND},
{0x27, UNICODE_APOSTROPHE},
{0x28, UNICODE_LEFT_PARENTHESIS},
{0x29, UNICODE_RIGHT_PARENTHESIS},
{0x2c, UNICODE_COMMA},
{0x2d, UNICODE_HYPHEN_MINUS},
{0x2e, UNICODE_FULL_STOP},
{0x30, UNICODE_DIGIT_0},
{0x31, UNICODE_DIGIT_1},
{0x32, UNICODE_DIGIT_2},
{0x33, UNICODE_DIGIT_3},
{0x34, UNICODE_DIGIT_4},
{0x35, UNICODE_DIGIT_5},
{0x36, UNICODE_DIGIT_6},
{0x37, UNICODE_DIGIT_7},
{0x38, UNICODE_DIGIT_8},
{0x39, UNICODE_DIGIT_9},
{0x3a, UNICODE_COLON},
{0x3f, UNICODE_QUESTION_MARK},
{0x41, UNICODE_CAPITAL_A},
{0x42, UNICODE_CAPITAL_B},
{0x43, UNICODE_CAPITAL_C},
{0x44, UNICODE_CAPITAL_D},
{0x45, UNICODE_CAPITAL_E},
{0x46, UNICODE_CAPITAL_F},
{0x47, UNICODE_CAPITAL_G},
{0x48, UNICODE_CAPITAL_H},
{0x49, UNICODE_CAPITAL_I},
{0x4a, UNICODE_CAPITAL_J},
{0x4b, UNICODE_CAPITAL_K},
{0x4c, UNICODE_CAPITAL_L},
{0x4d, UNICODE_CAPITAL_M},
{0x4e, UNICODE_CAPITAL_N},
{0x4f, UNICODE_CAPITAL_O},
{0x50, UNICODE_CAPITAL_P},
{0x51, UNICODE_CAPITAL_Q},
{0x52, UNICODE_CAPITAL_R},
{0x53, UNICODE_CAPITAL_S},
{0x54, UNICODE_CAPITAL_T},
{0x55, UNICODE_CAPITAL_U},
{0x56, UNICODE_CAPITAL_V},
{0x57, UNICODE_CAPITAL_W},
{0x58, UNICODE_CAPITAL_X},
{0x59, UNICODE_CAPITAL_Y},
{0x5a, UNICODE_CAPITAL_Z},
{0x61, UNICODE_SMALL_A},
{0x62, UNICODE_SMALL_B},
{0x63, UNICODE_SMALL_C},
{0x64, UNICODE_SMALL_D},
{0x65, UNICODE_SMALL_E},
{0x66, UNICODE_SMALL_F},
{0x67, UNICODE_SMALL_G},
{0x68, UNICODE_SMALL_H},
{0x69, UNICODE_SMALL_I},
{0x6a, UNICODE_SMALL_J},
{0x6b, UNICODE_SMALL_K},
{0x6c, UNICODE_SMALL_L},
{0x6d, UNICODE_SMALL_M},
{0x6e, UNICODE_SMALL_N},
{0x6f, UNICODE_SMALL_O},
{0x70, UNICODE_SMALL_P},
{0x71, UNICODE_SMALL_Q},
{0x72, UNICODE_SMALL_R},
{0x73, UNICODE_SMALL_S},
{0x74, UNICODE_SMALL_T},
{0x75, UNICODE_SMALL_U},
{0x76, UNICODE_SMALL_V},
{0x77, UNICODE_SMALL_W},
{0x78, UNICODE_SMALL_X},
{0x79, UNICODE_SMALL_Y},
{0x7a, UNICODE_SMALL_Z},
{0x80, UNICODE_CAPITAL_A_GRAVE},
{0x81, UNICODE_CAPITAL_A_ACUTE},
{0x82, UNICODE_CAPITAL_A_CIRCUMFLEX},
{0x83, UNICODE_CAPITAL_A_DIARESIS},
{0x84, UNICODE_CAPITAL_AE},
{0x85, UNICODE_CAPITAL_C_CEDILLA},
{0x86, UNICODE_CAPITAL_E_GRAVE},
{0x87, UNICODE_CAPITAL_E_ACUTE},
{0x88, UNICODE_CAPITAL_E_CIRCUMFLEX},
{0x89, UNICODE_CAPITAL_E_DIARESIS},
{0x8a, UNICODE_CAPITAL_I_GRAVE},
{0x8b, UNICODE_CAPITAL_I_ACUTE},
{0x8c, UNICODE_CAPITAL_I_CIRCUMFLEX},
{0x8d, UNICODE_CAPITAL_I_DIARESIS},
{0x8e, UNICODE_CAPITAL_O_GRAVE},
{0x8f, UNICODE_CAPITAL_O_ACUTE},
{0x90, UNICODE_CAPITAL_O_CIRCUMFLEX},
{0x91, UNICODE_CAPITAL_O_DIARESIS},
{0x92, UNICODE_CAPITAL_U_GRAVE},
{0x93, UNICODE_CAPITAL_U_ACUTE},
{0x94, UNICODE_CAPITAL_U_CIRCUMFLEX},
{0x95, UNICODE_CAPITAL_U_DIARESIS},
{0x96, UNICODE_SMALL_SHARP_S},
{0x97, UNICODE_SMALL_A_GRAVE},
{0x98, UNICODE_SMALL_A_ACUTE},
{0x99, UNICODE_SMALL_A_CIRCUMFLEX},
{0x9a, UNICODE_SMALL_A_DIARESIS},
{0x9b, UNICODE_SMALL_AE},
{0x9c, UNICODE_SMALL_C_CEDILLA},
{0x9d, UNICODE_SMALL_E_GRAVE},
{0x9e, UNICODE_SMALL_E_ACUTE},
{0x9f, UNICODE_SMALL_E_CIRCUMFLEX},
{0xa0, UNICODE_SMALL_E_DIARESIS},
{0xa1, UNICODE_SMALL_I_GRAVE},
{0xa2, UNICODE_SMALL_I_ACUTE},
{0xa3, UNICODE_SMALL_I_CIRCUMFLEX},
{0xa4, UNICODE_SMALL_I_DIARESIS},
{0xa5, UNICODE_SMALL_O_GRAVE},
{0xa6, UNICODE_SMALL_O_ACUTE},
{0xa7, UNICODE_SMALL_O_CIRCUMFLEX},
{0xa8, UNICODE_SMALL_O_DIARESIS},
{0xa9, UNICODE_SMALL_U_GRAVE},
{0xaa, UNICODE_SMALL_U_ACUTE},
{0xab, UNICODE_SMALL_U_CIRCUMFLEX},
{0xac, UNICODE_SMALL_U_DIARESIS},
{0xad, UNICODE_CAPITAL_N_TILDE},
{0xae, UNICODE_SMALL_N_TILDE},
{0xaf, UNICODE_INVERTED_QUESTION_MARK},
{0xb0, UNICODE_INVERTED_EXCLAMATION_MARK}
};
static const FontMap::gschar_unicode_map_t map_gta3_font_0_priv = {
{0x22, UNICODE_QUOTATION_MARK},
{0x23, UNICODE_NUMBER_SIGN},
{0x2a, UNICODE_HYPHEN_MINUS},
/*{0x2b, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x2f, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x3b, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x3c, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x3d, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x3e, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x40, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x5b, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x5c, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x5d, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x5e, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x5f, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x60, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x6b, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x7c, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
};
static const FontMap::gschar_unicode_map_t map_gta3_font_1_priv = {
{0x22, UNICODE_INCREMENT},
{0x23, UNICODE_REGISTERED_SIGN},
/*{0x2a, UNICODE_REPLACEMENT_CHARACTER},*/
{0x2b, UNICODE_PLUS_SIGN},
{0x2f, UNICODE_MULTIPLICATION_SIGN},
{0x3b, UNICODE_BLACK_UP_POINTING_TRIANGLE},
{0x3c, UNICODE_BLACK_LEFT_POINTING_POINTER},
{0x3d, UNICODE_BLACK_DOWN_POINTING_POINTER},
{0x3e, UNICODE_BLACK_RIGHT_POINTING_POINTER},
{0x40, UNICODE_TRADE_MARK},
{0x5b, UNICODE_SHIELD},
{0x5c, UNICODE_REVERSE_SOLIDUS},
{0x5d, UNICODE_BLACK_STAR},
{0x5e, UNICODE_NUMERO_SIGN},
{0x5f, UNICODE_DEGREES},
{0x60, UNICODE_COPYRIGHT_SIGN},
{0x7b, UNICODE_BLACK_HEART_SUIT},
{0x7c, UNICODE_WHITE_CIRCLE},
/*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
};
static const FontMap::gschar_unicode_map_t map_gta3_font_2_priv = {
{0x22, UNICODE_INCREMENT},
{0x23, UNICODE_NUMBER_SIGN},
{0x2a, UNICODE_ASTERISK},
{0x2b, UNICODE_PLUS_SIGN},
{0x2f, UNICODE_SOLIDUS},
{0x3b, UNICODE_SEMICOLON},
{0x3c, UNICODE_BLACK_LEFT_POINTING_POINTER},
{0x3d, UNICODE_EQUALS_SIGN},
{0x3e, UNICODE_BLACK_RIGHT_POINTING_POINTER},
{0x40, UNICODE_TRADE_MARK},
{0x5b, UNICODE_LEFT_SQUARE_BRACKET},
{0x5c, UNICODE_REVERSE_SOLIDUS},
{0x5d, UNICODE_RIGHT_SQUARE_BRACKET},
{0x5e, UNICODE_CIRCUMFLEX_ACCENT},
{0x5f, UNICODE_DEGREES},
{0x60, UNICODE_GRAVE_ACCENT},
/*{0x7b, UNICODE_REPLACEMENT_CHARACTER},*/
{0x7c, UNICODE_WHITE_CIRCLE},
/*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
/*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
{0xb1, UNICODE_ACUTE_ACCENT},
};
const FontMap map_gta3_font0({map_gta3_font_common, map_gta3_font_0_priv});
const FontMap map_gta3_font1({map_gta3_font_common, map_gta3_font_1_priv});
const FontMap map_gta3_font2({map_gta3_font_common, map_gta3_font_2_priv});
const std::array<FontMap, 3> maps_gta3_font = {
FontMap({map_gta3_font_common, map_gta3_font_0_priv}),
FontMap({map_gta3_font_common, map_gta3_font_1_priv}),
FontMap({map_gta3_font_common, map_gta3_font_2_priv}),
};

View File

@ -0,0 +1,28 @@
#ifndef _RWLIB_FONTS_FONTMAPGTA3_HPP_
#define _RWLIB_FONTS_FONTMAPGTA3_HPP_
#include "FontMap.hpp"
#include <array>
/**
* Font mapping of font style 0: Pager
*/
extern const FontMap map_gta3_font0;
/**
* Font mapping of font style 1: Pricedown
*/
extern const FontMap map_gta3_font1;
/**
* Font mapping of font style 2: Arial
*/
extern const FontMap map_gta3_font2;
/**
* Array of all font mappings.
*/
extern const std::array<FontMap, 3> maps_gta3_font;
#endif

View File

@ -1,5 +1,6 @@
#ifndef _RWENGINE_GAMETEXTS_HPP_
#define _RWENGINE_GAMETEXTS_HPP_
#ifndef _RWLIB_FONTS_GAMETEXTS_HPP_
#define _RWLIB_FONTS_GAMETEXTS_HPP_
#include <cstdint>
#include <string>
#include <unordered_map>
@ -7,12 +8,14 @@
/**
* Each GXT char is just a 16-bit index into the font map.
*/
using GameStringChar = uint16_t;
using GameStringChar = std::uint16_t;
/**
* The game stores strings as 16-bit indexes into the font
* texture, which is something simllar to ASCII.
*/
using GameString = std::basic_string<GameStringChar>;
/**
* GXT keys are just 8 single byte chars.
* Keys are small so should be subject to SSO
@ -32,8 +35,7 @@ GameString fromString(const std::string& str);
/**
* Since the encoding of symbols is arbitrary, these constants should be used in
* hard-coded strings containing symbols outside of the ASCII-subset supported
* by
* all fonts
* by all fonts
*/
namespace GameSymbols {
static constexpr GameStringChar Money = '$';

View File

@ -0,0 +1,110 @@
#include "Unicode.hpp"
#include <istream>
size_t unicode_to_utf8(unicode_t unicode, char c[4]) {
if (unicode < 0x80) { // 7 bits
c[0] = unicode;
return 1;
} else if (unicode < 0x800) { // 11 bits
c[0] = 0xc0 | (unicode >> 6);
c[1] = 0x80 | (unicode & 0x3f);
return 2;
} else if (unicode < 0x10000) { // 16 bits
c[0] = 0xe0 | (unicode >> 12);
c[1] = 0x80 | ((unicode >> 6) & 0x3f);
c[2] = 0x80 | (unicode & 0x3f);
return 3;
} else if (unicode < 0x110000) { // 21 bits
c[0] = 0xf0 | (unicode >> 18);
c[1] = 0x80 | ((unicode >> 12) & 0x3f);
c[2] = 0x80 | ((unicode >> 6) & 0x3f);
c[3] = 0x80 | (unicode & 0x3f);
return 4;
} else {
return unicode_to_utf8(UNICODE_REPLACEMENT_CHARACTER, c);
}
}
Utf8UnicodeIterator::Utf8UnicodeIterator() : m_is(nullptr), m_finished(true) {
}
Utf8UnicodeIterator::Utf8UnicodeIterator(std::istream &is) : m_is(&is), m_finished(false) {
next_unicode();
}
void Utf8UnicodeIterator::next_unicode() {
int c = m_is->get();
if (c == EOF) {
m_finished = true;
return;
}
char cc = static_cast<char>(c);
unicode_t unicode;
unsigned nb_bytes;
if ((cc & 0x80) == 0x00) {
unicode = cc;
nb_bytes = 0;
} else if ((c & 0xe0) == 0xc0) {
unicode = c & 0x1f;
nb_bytes = 1;
} else if ((c & 0xf0) == 0xe0) {
unicode = c & 0x0f;
nb_bytes = 2;
} else if ((c & 0xf8) == 0xf0) {
unicode = c & 0x07;
nb_bytes = 3;
} else {
unicode = UNICODE_REPLACEMENT_CHARACTER;
nb_bytes = 0;
}
while (nb_bytes != 0) {
c = m_is->get();
if (c == EOF) {
unicode = UNICODE_REPLACEMENT_CHARACTER;
m_finished = true;
break;
}
cc = static_cast<char>(c);
if ((c & 0xc0) != 0x80) {
unicode = UNICODE_REPLACEMENT_CHARACTER;
break;
}
unicode = (unicode << 6) | (c & 0x3f);
--nb_bytes;
}
m_unicode = unicode;
}
Utf8UnicodeIterator &Utf8UnicodeIterator::operator ++() {
next_unicode();
return *this;
}
unicode_t Utf8UnicodeIterator::unicode() const {
return m_unicode;
}
unicode_t Utf8UnicodeIterator::operator *() const {
return m_unicode;
}
bool Utf8UnicodeIterator::good() const {
return !m_finished;
}
Utf8UnicodeIteratorWrapper::Utf8UnicodeIteratorWrapper(const std::string &s)
: iss(s) {
}
Utf8UnicodeIterator Utf8UnicodeIteratorWrapper::begin() {
return Utf8UnicodeIterator(iss);
}
Utf8UnicodeIterator Utf8UnicodeIteratorWrapper::end() {
return Utf8UnicodeIterator();
}
bool Utf8UnicodeIterator::operator !=(const Utf8UnicodeIterator &) {
return good();
}

View File

@ -0,0 +1,303 @@
#ifndef _RWLIB_FONTS_UNICODE_HPP_
#define _RWLIB_FONTS_UNICODE_HPP_
#include <cstdint>
#include <iosfwd>
#include <sstream>
/**
* unicode_t represent a unicode data point. (UTF-32)
*/
using unicode_t = char32_t;
/**
* @brief unicode_to_utf8 Encode a unicode data point to a (non-zero terminated) utf-8 string.
* @param unicode The unicode data point to convert
* @param c buffer to write the utf-8 data to
* @return number of bytes written
*/
size_t unicode_to_utf8(unicode_t unicode, char c[4]);
/**
* @brief Iterate over a utf8 string stream. Output unicode data points.
*/
class Utf8UnicodeIterator {
private:
/**
* @brief m_is Pointer to the utf8 stream to iterate over.
*/
std::istream *m_is;
/**
* @brief m_finished true if the stream is finished/invalid.
*/
bool m_finished;
/**
* @brief m_unicode Current unicode point.
*/
unicode_t m_unicode;
/**
* @brief next_unicode Move to the next unicode point.
*/
void next_unicode();
public:
/**
* @brief Create an empty unicode iterator. The iterator is not good.
*/
Utf8UnicodeIterator();
/**
* @brief Create a unicode iterator that iterates over a utf8 stream.
* @param is utf8 stream
*/
Utf8UnicodeIterator(std::istream &is);
/**
* @brief operator ++ Move to the next unicode character.
* Only call this function when the iterator is good.
* @return this object
*/
Utf8UnicodeIterator &operator ++();
/**
* @brief operator != Returns true if this iterator is good
* @return true if this iterator is good
*/
bool operator!=(const Utf8UnicodeIterator &);
/**
* @brief operator * Returns the current unicode point
* ONly call this function when the iterator is good.
* @return unicode point
*/
unicode_t operator *() const;
/**
* @brief unicode Returns the current unicode point
* Only call this function when the iterator is good.
* @return unicode point
*/
unicode_t unicode() const;
/**
* @brief good Checks whether this stream is good.
* A stream is good when a unicode point is available.
* @return true if the stream is good, else false.
*/
bool good() const;
};
/**
* @brief This class lets you iterate over a utf8 string using for ranged loop
*/
class Utf8UnicodeIteratorWrapper {
public:
/**
* @brief Create a new wraper wrapper.
* @param s The string that the utf8 to Unicode iterator should iterate over
*/
Utf8UnicodeIteratorWrapper(const std::string &s);
/**
* @brief Return the iterator at the start. This one returns unicode points.
* @return Utf8UnicodeIterator of which will read the unicode points.
*/
Utf8UnicodeIterator begin();
/**
* @brief Return the unicode iterator at the end. This one is not good.
* No unicode points can be read from this object.
* @return Utf8UnicodeIterator Unicode iterator that is not good.
*/
Utf8UnicodeIterator end();
private:
/**
* @brief iss This utf8 stream holds the wrapped string and returns bytes to convert to unicode.
*/
std::istringstream iss;
};
/**
* Unicode data points used by OpenRW.
*/
enum UnicodeValue {
UNICODE_TAB = 0x09, /* '\t' */
UNICODE_NEW_LINE = 0x0a, /* '\n' */
UNICODE_CARRIAGE_RETURN = 0x0d, /* '\r' */
UNICODE_SPACE = 0x20, /* ' ' */
UNICODE_EXCLAMATION_MARK = 0x21, /* '!' */
UNICODE_QUOTATION_MARK = 0x22, /* '"' */
UNICODE_NUMBER_SIGN = 0x23, /* '#' */
UNICODE_DOLLAR_SIGN = 0x24, /* '$' */
UNICODE_PROCENT_SIGN = 0x25, /* '%' */
UNICODE_AMPERSAND = 0x26, /* '&' */
UNICODE_APOSTROPHE = 0x27, /* ''' */
UNICODE_LEFT_PARENTHESIS = 0x28, /* '(' */
UNICODE_RIGHT_PARENTHESIS = 0x29, /* ')' */
UNICODE_ASTERISK = 0x2a, /* '*' */
UNICODE_PLUS_SIGN = 0x2b, /* '+' */
UNICODE_COMMA = 0x2c, /* ',' */
UNICODE_HYPHEN_MINUS = 0x2d, /* '-' */
UNICODE_FULL_STOP = 0x2e, /* '.' */
UNICODE_SOLIDUS = 0x2f, /* '/' */
UNICODE_DIGIT_0 = 0x30, /* '0' */
UNICODE_DIGIT_1 = 0x31, /* '1' */
UNICODE_DIGIT_2 = 0x32, /* '2' */
UNICODE_DIGIT_3 = 0x33, /* '3' */
UNICODE_DIGIT_4 = 0x34, /* '4' */
UNICODE_DIGIT_5 = 0x35, /* '5' */
UNICODE_DIGIT_6 = 0x36, /* '6' */
UNICODE_DIGIT_7 = 0x37, /* '7' */
UNICODE_DIGIT_8 = 0x38, /* '8' */
UNICODE_DIGIT_9 = 0x39, /* '9' */
UNICODE_COLON = 0x3a, /* ':' */
UNICODE_SEMICOLON = 0x3b, /* ';' */
UNICODE_LESS_THAN_SIGN = 0x3c, /* '<' */
UNICODE_EQUALS_SIGN = 0x3d, /* '=' */
UNICODE_GREATER_THAN_SIGN = 0x3e, /* '>' */
UNICODE_QUESTION_MARK = 0x3f, /* '?' */
UNICODE_COMMERCIAL_AT = 0x40, /* '@' */
UNICODE_CAPITAL_A = 0x41, /* 'A' */
UNICODE_CAPITAL_B = 0x42, /* 'B' */
UNICODE_CAPITAL_C = 0x43, /* 'C' */
UNICODE_CAPITAL_D = 0x44, /* 'D' */
UNICODE_CAPITAL_E = 0x45, /* 'E' */
UNICODE_CAPITAL_F = 0x46, /* 'F' */
UNICODE_CAPITAL_G = 0x47, /* 'G' */
UNICODE_CAPITAL_H = 0x48, /* 'H' */
UNICODE_CAPITAL_I = 0x49, /* 'I' */
UNICODE_CAPITAL_J = 0x4a, /* 'J' */
UNICODE_CAPITAL_K = 0x4b, /* 'K' */
UNICODE_CAPITAL_L = 0x4c, /* 'L' */
UNICODE_CAPITAL_M = 0x4d, /* 'M' */
UNICODE_CAPITAL_N = 0x4e, /* 'N' */
UNICODE_CAPITAL_O = 0x4f, /* 'O' */
UNICODE_CAPITAL_P = 0x50, /* 'P' */
UNICODE_CAPITAL_Q = 0x51, /* 'Q' */
UNICODE_CAPITAL_R = 0x52, /* 'R' */
UNICODE_CAPITAL_S = 0x53, /* 'S' */
UNICODE_CAPITAL_T = 0x54, /* 'T' */
UNICODE_CAPITAL_U = 0x55, /* 'U' */
UNICODE_CAPITAL_V = 0x56, /* 'V' */
UNICODE_CAPITAL_W = 0x57, /* 'W' */
UNICODE_CAPITAL_X = 0x58, /* 'X' */
UNICODE_CAPITAL_Y = 0x59, /* 'Y' */
UNICODE_CAPITAL_Z = 0x5a, /* 'Z' */
UNICODE_LEFT_SQUARE_BRACKET = 0x5b, /* '[' */
UNICODE_REVERSE_SOLIDUS = 0x5c, /* '\' */
UNICODE_RIGHT_SQUARE_BRACKET = 0x5d, /* ']' */
UNICODE_CIRCUMFLEX_ACCENT = 0x5e, /* '^' */
UNICODE_LOW_LINE = 0x5f, /* '_' */
UNICODE_GRAVE_ACCENT = 0x60, /* '`' */
UNICODE_SMALL_A = 0x61, /* 'a' */
UNICODE_SMALL_B = 0x62, /* 'b' */
UNICODE_SMALL_C = 0x63, /* 'c' */
UNICODE_SMALL_D = 0x64, /* 'd' */
UNICODE_SMALL_E = 0x65, /* 'e' */
UNICODE_SMALL_F = 0x66, /* 'f' */
UNICODE_SMALL_G = 0x67, /* 'g' */
UNICODE_SMALL_H = 0x68, /* 'h' */
UNICODE_SMALL_I = 0x69, /* 'i' */
UNICODE_SMALL_J = 0x6a, /* 'j' */
UNICODE_SMALL_K = 0x6b, /* 'k' */
UNICODE_SMALL_L = 0x6c, /* 'l' */
UNICODE_SMALL_M = 0x6d, /* 'm' */
UNICODE_SMALL_N = 0x6e, /* 'n' */
UNICODE_SMALL_O = 0x6f, /* 'o' */
UNICODE_SMALL_P = 0x70, /* 'p' */
UNICODE_SMALL_Q = 0x71, /* 'q' */
UNICODE_SMALL_R = 0x72, /* 'r' */
UNICODE_SMALL_S = 0x73, /* 's' */
UNICODE_SMALL_T = 0x74, /* 't' */
UNICODE_SMALL_U = 0x75, /* 'u' */
UNICODE_SMALL_V = 0x76, /* 'v' */
UNICODE_SMALL_W = 0x77, /* 'w' */
UNICODE_SMALL_X = 0x78, /* 'x' */
UNICODE_SMALL_Y = 0x79, /* 'y' */
UNICODE_SMALL_Z = 0x7a, /* 'z' */
UNICODE_LEFT_CURLY_BRACKET = 0x7b, /* '{' */
UNICODE_VERTICAL_LINE = 0x7c, /* '|' */
UNICODE_RIGHT_CURLY_BRACKET = 0x7d, /* '}' */
UNICODE_TILDE = 0x7e, /* '~' */
UNICODE_INVERTED_EXCLAMATION_MARK = 0xa1, /* '¡' */
UNICODE_COPYRIGHT_SIGN = 0xa9, /* '©' */
UNICODE_REGISTERED_SIGN = 0xae, /* '®' */
UNICODE_DEGREES = 0xb0, /* '°' */
UNICODE_ACUTE_ACCENT = 0xb4, /* '´' */
UNICODE_INVERTED_QUESTION_MARK = 0xbf, /* '¿' */
UNICODE_CAPITAL_A_GRAVE = 0xc0, /* 'À' */
UNICODE_CAPITAL_A_ACUTE = 0xc1, /* 'Á' */
UNICODE_CAPITAL_A_CIRCUMFLEX = 0xc2, /* 'Â' */
UNICODE_CAPITAL_A_DIARESIS = 0xc4, /* 'Ä' */
UNICODE_CAPITAL_AE = 0xc6, /* 'Æ' */
UNICODE_CAPITAL_C_CEDILLA = 0xc7, /* 'Ç' */
UNICODE_CAPITAL_E_GRAVE = 0xc8, /* 'È' */
UNICODE_CAPITAL_E_ACUTE = 0xc9, /* 'É' */
UNICODE_CAPITAL_E_CIRCUMFLEX = 0xca, /* 'Ê' */
UNICODE_CAPITAL_E_DIARESIS = 0xcb, /* 'Ë' */
UNICODE_CAPITAL_I_GRAVE = 0xcc, /* 'Ì' */
UNICODE_CAPITAL_I_ACUTE = 0xcd, /* 'Í' */
UNICODE_CAPITAL_I_CIRCUMFLEX = 0xce, /* 'Î' */
UNICODE_CAPITAL_I_DIARESIS = 0xcf, /* 'Ï' */
UNICODE_CAPITAL_N_TILDE = 0xd1, /* 'Ñ' */
UNICODE_CAPITAL_O_GRAVE = 0xd2, /* 'Ò' */
UNICODE_CAPITAL_O_ACUTE = 0xd3, /* 'Ó' */
UNICODE_CAPITAL_O_CIRCUMFLEX = 0xd4, /* 'Ô' */
UNICODE_CAPITAL_O_DIARESIS = 0xd6, /* 'Ö' */
UNICODE_MULTIPLICATION_SIGN = 0xd7, /* '×' */
UNICODE_CAPITAL_U_GRAVE = 0xd9, /* 'Ù' */
UNICODE_CAPITAL_U_ACUTE = 0xda, /* 'Ú' */
UNICODE_CAPITAL_U_CIRCUMFLEX = 0xdb, /* 'Û' */
UNICODE_CAPITAL_U_DIARESIS = 0xdc, /* 'Ü' */
UNICODE_SMALL_SHARP_S = 0xdf, /* 'ß' */
UNICODE_SMALL_A_GRAVE = 0xe0, /* 'à' */
UNICODE_SMALL_A_ACUTE = 0xe1, /* 'á' */
UNICODE_SMALL_A_CIRCUMFLEX = 0xe2, /* 'â' */
UNICODE_SMALL_A_DIARESIS = 0xe4, /* 'ä' */
UNICODE_SMALL_AE = 0xe6, /* 'æ' */
UNICODE_SMALL_C_CEDILLA = 0xe7, /* 'ç' */
UNICODE_SMALL_E_GRAVE = 0xe8, /* 'è' */
UNICODE_SMALL_E_ACUTE = 0xe9, /* 'é' */
UNICODE_SMALL_E_CIRCUMFLEX = 0xea, /* 'ê' */
UNICODE_SMALL_E_DIARESIS = 0xeb, /* 'ë' */
UNICODE_SMALL_I_GRAVE = 0xec, /* 'ì' */
UNICODE_SMALL_I_ACUTE = 0xed, /* 'í' */
UNICODE_SMALL_I_CIRCUMFLEX = 0xee, /* 'î' */
UNICODE_SMALL_I_DIARESIS = 0xef, /* 'ï' */
UNICODE_SMALL_N_TILDE = 0xf1, /* 'ñ' */
UNICODE_SMALL_O_GRAVE = 0xf2, /* 'ò' */
UNICODE_SMALL_O_ACUTE = 0xf3, /* 'ó' */
UNICODE_SMALL_O_CIRCUMFLEX = 0xf4, /* 'ô' */
UNICODE_SMALL_O_DIARESIS = 0xf6, /* 'ö' */
UNICODE_SMALL_U_GRAVE = 0xf9, /* 'ù' */
UNICODE_SMALL_U_ACUTE = 0xfa, /* 'ú' */
UNICODE_SMALL_U_CIRCUMFLEX = 0xfb, /* 'û' */
UNICODE_SMALL_U_DIARESIS = 0xfc, /* 'ü' */
UNICODE_NUMERO_SIGN = 0x2116, /* '№' */
UNICODE_TRADE_MARK = 0x2122, /* '™' */
UNICODE_INCREMENT = 0x2206, /* '∆' */
UNICODE_BLACK_UP_POINTING_TRIANGLE = 0x25b2, /* '▲' */
UNICODE_BLACK_RIGHT_POINTING_POINTER = 0x25ba, /* '►' */
UNICODE_BLACK_DOWN_POINTING_POINTER = 0x25bc, /* '▼' */
UNICODE_BLACK_LEFT_POINTING_POINTER = 0x25c4, /* '◄' */
UNICODE_WHITE_CIRCLE = 0x25cb, /* '○' */
UNICODE_BLACK_STAR = 0x2605, /* '★' */
UNICODE_BLACK_HEART_SUIT = 0x2665, /* '♥' */
UNICODE_CROSS_MARK = 0x274c, /* '❌' */
UNICODE_REPLACEMENT_CHARACTER = 0xfffd, /* '<27>' */
UNICODE_SHIELD = 0x1f6e1, /* '🛡' */
};
#endif

View File

@ -31,6 +31,7 @@ set(TESTS
SaveGame
ScriptMachine
State
StringEncoding
Text
TrafficDirector
Vehicle

View File

@ -0,0 +1,200 @@
#include <boost/test/unit_test.hpp>
#include <fonts/FontMapGta3.hpp>
#include <fonts/Unicode.hpp>
#include <iostream>
#include <vector>
BOOST_TEST_DONT_PRINT_LOG_VALUE(GameString)
/**
* All tests about changing string encodings.
*/
BOOST_AUTO_TEST_SUITE(StringEncodingTests)
BOOST_AUTO_TEST_CASE(unicode_to_char_1char) {
char val[4];
unicode_t u = 0x3f; /* QUESTION MARK */
auto nb = unicode_to_utf8(u, val);
BOOST_CHECK_EQUAL(nb, 1);
const char ref[1] = {0x3f};
BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
}
BOOST_AUTO_TEST_CASE(unicode_to_char_2char) {
char val[4];
unicode_t u = 0x00e6; /* LATIN SMALL LETTER AE */
auto nb = unicode_to_utf8(u, val);
BOOST_CHECK_EQUAL(nb, 2);
const char ref[2] = {char(0xc3), char(0xa6)};
BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
}
BOOST_AUTO_TEST_CASE(unicode_to_char_3char) {
char val[4];
unicode_t u = 0x0f45; /* TIBETAN LETTER CA */
auto nb = unicode_to_utf8(u, val);
BOOST_CHECK_EQUAL(nb, 3);
const char ref[3] = {char(0xe0), char(0xbd), char(0x85)};
BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
}
BOOST_AUTO_TEST_CASE(unicode_to_char_4char) {
char val[4];
unicode_t u = 0x10454; /* SHAVIAN LETTER THIGH */
auto nb = unicode_to_utf8(u, val);
BOOST_CHECK_EQUAL(nb, 4);
const char ref[4] = {char(0xf0), char(0x90), char(0x91), char(0x94)};
BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
}
BOOST_AUTO_TEST_CASE(unicode_to_char_illegal) {
char val[4];
unicode_t u = 0x124544; /* Illegal unicode */
auto nb = unicode_to_utf8(u, val);
BOOST_CHECK_EQUAL(nb, 3);
const char ref[4] = {char(0xef), char(0xbf), char(0xbd)}; // utf-8 encoding of replacement character
BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
}
BOOST_AUTO_TEST_CASE(utf8_iterator_simple) {
std::string s("Hello World", 12);
std::istringstream iss(s);
Utf8UnicodeIterator it(iss);
BOOST_CHECK_EQUAL(s.size(), 12);
for (size_t i=0; i < s.size(); ++i) {
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL(it.unicode(), s[i]);
++it;
}
BOOST_CHECK(!it.good());
}
BOOST_AUTO_TEST_CASE(utf8_iterator_invalid) {
const unsigned char s[] = {'a', 0xff, 'b', 0xff, 'c', 0x00};
std::istringstream iss(reinterpret_cast<const char *>(s));
Utf8UnicodeIterator it(iss);
BOOST_CHECK_EQUAL(sizeof(s), 6);
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL('a', it.unicode());
++it;
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL(UNICODE_REPLACEMENT_CHARACTER, it.unicode());
++it;
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL('b', it.unicode());
++it;
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL(UNICODE_REPLACEMENT_CHARACTER, it.unicode());
++it;
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL('c', it.unicode());
++it;
BOOST_CHECK(!it.good());
}
typedef struct {
const char *utf8;
unicode_t unicode;
} utf8_unicode_t;
const utf8_unicode_t utf_unicode_data[] = {
{
"\x2e", 0x2e, /* full stop*/
}, {
"\x77", 0x77, /* w */
}, {
"\xc3\x97", 0xd7, /* multiplication sign */
}, {
"\xd8\x8c", 0x060c, /* Arabic comma */
}, {
"\xe2\x9b\xb0", 0x26f0, /* mountain */
}, {
"\xe2\x8a\xa8", 0x22a8, /* true */
}, {
"\xf0\x9f\xa7\x9b", 0x1f9db, /* vampire */
}, {
"\xf0\x9f\xa4\x9f", 0x1f91f, /* I love you hand sign */
}, {
"", 0, /* sentinel */
}
};
std::string createUtf8String() {
std::ostringstream oss;
for (const utf8_unicode_t *uu = utf_unicode_data; uu->unicode; ++uu) {
oss << uu->utf8;
}
return oss.str();
}
BOOST_AUTO_TEST_CASE(utf8_iterator_mixed) {
std::string str = createUtf8String();
std::istringstream iss(str);
Utf8UnicodeIterator it(iss);
size_t nb = 0;
for (const utf8_unicode_t *uu = utf_unicode_data; uu->unicode; ++uu) {
BOOST_CHECK(it.good());
BOOST_CHECK_EQUAL(it.unicode(), uu->unicode);
++it;
++nb;
}
BOOST_CHECK(!it.good());
}
BOOST_AUTO_TEST_CASE(utf8_iterator_ranged_for_loop) {
std::string str = createUtf8String();
std::istringstream iss(str);
Utf8UnicodeIterator it(iss);
size_t nb = 0;
const utf8_unicode_t *uu = utf_unicode_data;
for (unicode_t u : Utf8UnicodeIteratorWrapper(str)) {
BOOST_CHECK_EQUAL(u, uu->unicode);
++it;
++nb;
++uu;
}
BOOST_CHECK(!it.good());
}
BOOST_AUTO_TEST_CASE(GameStringChar_simple) {
for (const auto &fontmap : maps_gta3_font) {
auto c = fontmap.to_GameStringChar('x');
BOOST_CHECK_EQUAL(c, GameStringChar('x'));
auto u = fontmap.to_unicode('x');
BOOST_CHECK_EQUAL(u, unicode_t('x'));
}
}
BOOST_AUTO_TEST_CASE(GameString_simple) {
std::string s = "Hello world";
for (const auto &fontmap : maps_gta3_font) {
auto gs = fontmap.to_GameString(s);
BOOST_CHECK_EQUAL(s.size(), gs.length());
for (size_t i = 0; i < s.size(); ++i) {
BOOST_CHECK_EQUAL(gs[i], GameStringChar(s[i]));
}
}
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -1,6 +1,6 @@
#include <boost/test/unit_test.hpp>
#include <data/GameTexts.hpp>
#include <engine/ScreenText.hpp>
#include <fonts/GameTexts.hpp>
#include <loaders/LoaderGXT.hpp>
#include "test_Globals.hpp"
@ -23,6 +23,14 @@ BOOST_AUTO_TEST_CASE(load_test) {
}
}
BOOST_AUTO_TEST_CASE(special_chars) {
{
auto newline = T("\n");
BOOST_CHECK_EQUAL(newline.size(), 1);
BOOST_CHECK_EQUAL(newline[0], '\n');
}
}
BOOST_AUTO_TEST_CASE(big_test) {
// Check that makeBig creates a text in the right place
{