rwengine: add per-font mapping + unicode support + tests

2024-11-22 02:12:45 +01:00 · 2017-09-07 18:44:21 +02:00 · 2017-09-07 18:44:21 +02:00 · 2ad9667b50
commit 2ad9667b50
parent 74a4398e69
19 changed files with 1055 additions and 17 deletions
--- a/rwengine/CMakeLists.txt
+++ b/rwengine/CMakeLists.txt
@ -37,8 +37,6 @@ set(RWENGINE_SOURCES
    src/data/Chase.hpp
    src/data/CollisionModel.hpp
    src/data/CutsceneData.hpp
-    src/data/GameTexts.cpp
-    src/data/GameTexts.hpp
    src/data/InstanceData.hpp
    src/data/ModelData.cpp
    src/data/ModelData.hpp
--- a/rwengine/src/engine/GameData.hpp
+++ b/rwengine/src/engine/GameData.hpp
@ -18,11 +18,11 @@
 #include <rw/forward.hpp>

 #include <data/AnimGroup.hpp>
-#include <data/GameTexts.hpp>
 #include <data/ModelData.hpp>
 #include <data/PedData.hpp>
 #include <data/Weather.hpp>
 #include <data/ZoneData.hpp>
+#include <fonts/GameTexts.hpp>
 #include <loaders/LoaderDFF.hpp>
 #include <loaders/LoaderIMG.hpp>
 #include <loaders/LoaderTXD.hpp>
--- a/rwengine/src/engine/GameState.hpp
+++ b/rwengine/src/engine/GameState.hpp
@ -9,15 +9,13 @@
 #include <glm/glm.hpp>
 #include <glm/gtc/quaternion.hpp>

-#include <data/GameTexts.hpp>
 #include <data/VehicleGenerator.hpp>
 #include <engine/GameData.hpp>
 #include <engine/GameInputState.hpp>
-
 #include <engine/GameWorld.hpp>
 #include <engine/ScreenText.hpp>
+#include <fonts/GameTexts.hpp>
 #include <objects/ObjectTypes.hpp>
-
 #include <script/ScriptTypes.hpp>

 class GameWorld;
--- a/rwengine/src/engine/ScreenText.hpp
+++ b/rwengine/src/engine/ScreenText.hpp
@ -8,7 +8,7 @@

 #include <glm/glm.hpp>

-#include <data/GameTexts.hpp>
+#include <fonts/GameTexts.hpp>

 enum class ScreenTextType {
    /// Big text will be rendered according to the proscribed style.
--- a/rwengine/src/loaders/LoaderGXT.cpp
+++ b/rwengine/src/loaders/LoaderGXT.cpp
@ -5,10 +5,9 @@
 #include <string>
 #include <utility>

+#include <fonts/GameTexts.hpp>
 #include <platform/FileHandle.hpp>

-#include "data/GameTexts.hpp"
-
 void LoaderGXT::load(GameTexts &texts, const FileHandle &file) {
    auto data = file->data;

--- a/rwengine/src/render/TextRenderer.hpp
+++ b/rwengine/src/render/TextRenderer.hpp
@ -11,7 +11,7 @@
 #include <gl/DrawBuffer.hpp>
 #include <gl/GeometryBuffer.hpp>

-#include <data/GameTexts.hpp>
+#include <fonts/GameTexts.hpp>
 #include <render/OpenGLRenderer.hpp>

 #define GAME_FONTS 3
--- a/rwengine/src/script/ScriptFunctions.hpp
+++ b/rwengine/src/script/ScriptFunctions.hpp
@ -4,11 +4,11 @@
 #include <rw/debug.hpp>

 #include <ai/AIGraphNode.hpp>
-#include <data/GameTexts.hpp>
 #include <data/ModelData.hpp>
 #include <engine/GameData.hpp>
 #include <engine/GameState.hpp>
 #include <engine/GameWorld.hpp>
+#include <fonts/GameTexts.hpp>
 #include <objects/GameObject.hpp>
 #include <objects/CharacterObject.hpp>
 #include <objects/VehicleObject.hpp>
--- a/rwlib/CMakeLists.txt
+++ b/rwlib/CMakeLists.txt
@ -27,6 +27,15 @@ SET(RWLIB_SOURCES
    source/data/Clump.hpp
    source/data/Clump.cpp

+    source/fonts/FontMap.cpp
+    source/fonts/FontMap.hpp
+    source/fonts/FontMapGta3.cpp
+    source/fonts/FontMapGta3.hpp
+    source/fonts/GameTexts.cpp
+    source/fonts/GameTexts.hpp
+    source/fonts/Unicode.cpp
+    source/fonts/Unicode.hpp
+
    source/loaders/LoaderIMG.hpp
    source/loaders/LoaderIMG.cpp
    source/loaders/RWBinaryStream.hpp
--- a/rwlib/source/fonts/FontMap.cpp
+++ b/rwlib/source/fonts/FontMap.cpp
@ -0,0 +1,79 @@
+#include "FontMap.hpp"
+
+#include <rw/debug.hpp>
+
+#include <sstream>
+
+/**
+ * Use output operations to create GameStrings (only allows write operations)
+ */
+using OGameStringStream = std::basic_ostringstream<GameStringChar>;
+
+FontMap::FontMap(std::initializer_list<std::reference_wrapper<const gschar_unicode_map_t>> maps) {
+    for (const auto &map : maps) {
+        m_to_unicode.insert(map.get().cbegin(), map.get().cend());
+        for (const auto &m : map.get()) {
+            m_from_unicode[m.second] = m.first;
+        }
+    }
+    const auto &q = m_from_unicode.find(UNICODE_QUESTION_MARK);
+    if (q == m_from_unicode.end()) {
+        RW_ERROR("Font does not have a question mark");
+        m_unknown_gschar = ' ';
+    } else {
+        m_unknown_gschar = q->second;
+    }
+}
+
+GameStringChar FontMap::to_GameStringChar(unicode_t u) const {
+    if (u < 0x20) {
+        /* Passthrough control characters */
+        return u;
+    }
+    const auto &p = m_from_unicode.find(u);
+    if (p == m_from_unicode.end()) {
+        return m_unknown_gschar;
+    }
+    return p->second;
+}
+
+unicode_t FontMap::to_unicode(GameStringChar c) const {
+    if (c < 0x20) {
+        /* Passthrough control characters */
+        return c;
+    }
+    const auto &p = m_to_unicode.find(c);
+    if (p == m_to_unicode.end()) {
+        return UNICODE_REPLACEMENT_CHARACTER;
+    }
+    return p->second;
+}
+
+std::string FontMap::to_string(const GameString &s) const {
+    std::ostringstream oss;
+    for (GameStringChar c: s) {
+        char buffer[4];
+        unicode_t u = to_unicode(c);
+        auto nb = unicode_to_utf8(u, buffer);
+        oss.write(buffer, nb);
+    }
+    return oss.str();
+}
+
+GameString FontMap::to_GameString(const std::string &utf8) const {
+    OGameStringStream oss;
+    std::istringstream iss(utf8);
+    for (Utf8UnicodeIterator it(iss); it.good(); ++it) {
+        GameStringChar c = to_GameStringChar(it.unicode());
+        oss.write(&c, 1);
+    }
+    return oss.str();
+}
+
+FontMap::gschar_unicode_iterator FontMap::to_unicode_begin() const {
+    return m_to_unicode.cbegin();
+}
+
+FontMap::gschar_unicode_iterator FontMap::to_unicode_end() const {
+    return m_to_unicode.cend();
+}
--- a/rwlib/source/fonts/FontMap.hpp
+++ b/rwlib/source/fonts/FontMap.hpp
@ -0,0 +1,95 @@
+#ifndef _RWLIB_FONTS_FONTMAP_HPP_
+#define _RWLIB_FONTS_FONTMAP_HPP_
+
+#include <functional>
+#include <initializer_list>
+#include <map>
+
+#include "GameTexts.hpp"
+#include "Unicode.hpp"
+
+/**
+ * @brief Class providing mapping from unicode chars to game strings and vice versa.
+ * The conversions of an object of this class depend on the actual font used.
+ * @param maps
+ */
+class FontMap {
+public:
+    /**
+     * Mapping from GameStringChar to unicode data point.
+     */
+    using gschar_unicode_map_t = std::map<GameStringChar, unicode_t>;
+
+    /**
+     * Mapping from unicode data point to GameStringChar.
+     */
+    using unicode_gschar_map_t = std::map<unicode_t, GameStringChar>;
+
+    /**
+     * Iterator type over all GameStringChar to unicode.
+     */
+    using gschar_unicode_iterator = gschar_unicode_map_t::const_iterator;
+
+    /**
+     * @brief FontMap Create a new Fontmapping using the maps provided.
+     * @param maps List of mappings used as source for this font mapping.
+     */
+    FontMap(std::initializer_list<std::reference_wrapper<const gschar_unicode_map_t>> maps);
+
+    /**
+     * @brief to_GameStringChar Convert a unicode data point to a GameStringChar.
+     * @param u The unicode character.
+     * @return  A GameStringChar
+     */
+    GameStringChar to_GameStringChar(unicode_t u) const;
+
+    /**
+     * @brief to_unicoe Convert a GameStringChar to a unicode data point.
+     * @param c The GameStringChar
+     * @return A unicode character.
+     */
+    unicode_t to_unicode(GameStringChar c) const;
+
+    /**
+     * @brief to_string Convert a GameString to a utf-8 encoded string.
+     * @param s The GameString.
+     * @return A utf-8 encoded string.
+     */
+    std::string to_string(const GameString &s) const;
+
+    /**
+     * @brief to_GameString Convert a utf-8 encoded string to a GameString.
+     * @param utf8 The utf-8 encoded string.
+     * @return  A GameString.
+     */
+    GameString to_GameString(const std::string &utf8) const;
+
+    /**
+     * @brief to_unicode_begin Iterate over the GameStringChar to unicode begin.
+     * @return Iterator to begin.
+     */
+    gschar_unicode_iterator to_unicode_begin() const;
+
+    /**
+     * @brief to_unicode_begin Iterate over the GameStringChar to unicode end.
+     * @return Iterator Iterator to end.
+     */
+    gschar_unicode_iterator to_unicode_end() const;
+private:
+    /**
+     * Mapping from a unicode point to a GameStringChar.
+     */
+    unicode_gschar_map_t m_from_unicode;
+
+    /**
+     * Mapping from a GameStringChar to a unicode point.
+     */
+    gschar_unicode_map_t m_to_unicode;
+
+    /**
+     * GameStringChar used if a unicode point has no corresponding GameStringChar.
+     */
+    GameStringChar m_unknown_gschar;
+};
+
+#endif
--- a/rwlib/source/fonts/FontMapGta3.cpp
+++ b/rwlib/source/fonts/FontMapGta3.cpp
@ -0,0 +1,208 @@
+#include "FontMapGta3.hpp"
+
+static const FontMap::gschar_unicode_map_t map_gta3_font_common = {
+    {0x20, UNICODE_SPACE},
+    {0x21, UNICODE_EXCLAMATION_MARK},
+    {0x24, UNICODE_DOLLAR_SIGN},
+    {0x25, UNICODE_PROCENT_SIGN},
+    {0x26, UNICODE_AMPERSAND},
+    {0x27, UNICODE_APOSTROPHE},
+    {0x28, UNICODE_LEFT_PARENTHESIS},
+    {0x29, UNICODE_RIGHT_PARENTHESIS},
+    {0x2c, UNICODE_COMMA},
+    {0x2d, UNICODE_HYPHEN_MINUS},
+    {0x2e, UNICODE_FULL_STOP},
+    {0x30, UNICODE_DIGIT_0},
+    {0x31, UNICODE_DIGIT_1},
+    {0x32, UNICODE_DIGIT_2},
+    {0x33, UNICODE_DIGIT_3},
+    {0x34, UNICODE_DIGIT_4},
+    {0x35, UNICODE_DIGIT_5},
+    {0x36, UNICODE_DIGIT_6},
+    {0x37, UNICODE_DIGIT_7},
+    {0x38, UNICODE_DIGIT_8},
+    {0x39, UNICODE_DIGIT_9},
+    {0x3a, UNICODE_COLON},
+    {0x3f, UNICODE_QUESTION_MARK},
+    {0x41, UNICODE_CAPITAL_A},
+    {0x42, UNICODE_CAPITAL_B},
+    {0x43, UNICODE_CAPITAL_C},
+    {0x44, UNICODE_CAPITAL_D},
+    {0x45, UNICODE_CAPITAL_E},
+    {0x46, UNICODE_CAPITAL_F},
+    {0x47, UNICODE_CAPITAL_G},
+    {0x48, UNICODE_CAPITAL_H},
+    {0x49, UNICODE_CAPITAL_I},
+    {0x4a, UNICODE_CAPITAL_J},
+    {0x4b, UNICODE_CAPITAL_K},
+    {0x4c, UNICODE_CAPITAL_L},
+    {0x4d, UNICODE_CAPITAL_M},
+    {0x4e, UNICODE_CAPITAL_N},
+    {0x4f, UNICODE_CAPITAL_O},
+    {0x50, UNICODE_CAPITAL_P},
+    {0x51, UNICODE_CAPITAL_Q},
+    {0x52, UNICODE_CAPITAL_R},
+    {0x53, UNICODE_CAPITAL_S},
+    {0x54, UNICODE_CAPITAL_T},
+    {0x55, UNICODE_CAPITAL_U},
+    {0x56, UNICODE_CAPITAL_V},
+    {0x57, UNICODE_CAPITAL_W},
+    {0x58, UNICODE_CAPITAL_X},
+    {0x59, UNICODE_CAPITAL_Y},
+    {0x5a, UNICODE_CAPITAL_Z},
+    {0x61, UNICODE_SMALL_A},
+    {0x62, UNICODE_SMALL_B},
+    {0x63, UNICODE_SMALL_C},
+    {0x64, UNICODE_SMALL_D},
+    {0x65, UNICODE_SMALL_E},
+    {0x66, UNICODE_SMALL_F},
+    {0x67, UNICODE_SMALL_G},
+    {0x68, UNICODE_SMALL_H},
+    {0x69, UNICODE_SMALL_I},
+    {0x6a, UNICODE_SMALL_J},
+    {0x6b, UNICODE_SMALL_K},
+    {0x6c, UNICODE_SMALL_L},
+    {0x6d, UNICODE_SMALL_M},
+    {0x6e, UNICODE_SMALL_N},
+    {0x6f, UNICODE_SMALL_O},
+    {0x70, UNICODE_SMALL_P},
+    {0x71, UNICODE_SMALL_Q},
+    {0x72, UNICODE_SMALL_R},
+    {0x73, UNICODE_SMALL_S},
+    {0x74, UNICODE_SMALL_T},
+    {0x75, UNICODE_SMALL_U},
+    {0x76, UNICODE_SMALL_V},
+    {0x77, UNICODE_SMALL_W},
+    {0x78, UNICODE_SMALL_X},
+    {0x79, UNICODE_SMALL_Y},
+    {0x7a, UNICODE_SMALL_Z},
+    {0x80, UNICODE_CAPITAL_A_GRAVE},
+    {0x81, UNICODE_CAPITAL_A_ACUTE},
+    {0x82, UNICODE_CAPITAL_A_CIRCUMFLEX},
+    {0x83, UNICODE_CAPITAL_A_DIARESIS},
+    {0x84, UNICODE_CAPITAL_AE},
+    {0x85, UNICODE_CAPITAL_C_CEDILLA},
+    {0x86, UNICODE_CAPITAL_E_GRAVE},
+    {0x87, UNICODE_CAPITAL_E_ACUTE},
+    {0x88, UNICODE_CAPITAL_E_CIRCUMFLEX},
+    {0x89, UNICODE_CAPITAL_E_DIARESIS},
+    {0x8a, UNICODE_CAPITAL_I_GRAVE},
+    {0x8b, UNICODE_CAPITAL_I_ACUTE},
+    {0x8c, UNICODE_CAPITAL_I_CIRCUMFLEX},
+    {0x8d, UNICODE_CAPITAL_I_DIARESIS},
+    {0x8e, UNICODE_CAPITAL_O_GRAVE},
+    {0x8f, UNICODE_CAPITAL_O_ACUTE},
+    {0x90, UNICODE_CAPITAL_O_CIRCUMFLEX},
+    {0x91, UNICODE_CAPITAL_O_DIARESIS},
+    {0x92, UNICODE_CAPITAL_U_GRAVE},
+    {0x93, UNICODE_CAPITAL_U_ACUTE},
+    {0x94, UNICODE_CAPITAL_U_CIRCUMFLEX},
+    {0x95, UNICODE_CAPITAL_U_DIARESIS},
+    {0x96, UNICODE_SMALL_SHARP_S},
+    {0x97, UNICODE_SMALL_A_GRAVE},
+    {0x98, UNICODE_SMALL_A_ACUTE},
+    {0x99, UNICODE_SMALL_A_CIRCUMFLEX},
+    {0x9a, UNICODE_SMALL_A_DIARESIS},
+    {0x9b, UNICODE_SMALL_AE},
+    {0x9c, UNICODE_SMALL_C_CEDILLA},
+    {0x9d, UNICODE_SMALL_E_GRAVE},
+    {0x9e, UNICODE_SMALL_E_ACUTE},
+    {0x9f, UNICODE_SMALL_E_CIRCUMFLEX},
+    {0xa0, UNICODE_SMALL_E_DIARESIS},
+    {0xa1, UNICODE_SMALL_I_GRAVE},
+    {0xa2, UNICODE_SMALL_I_ACUTE},
+    {0xa3, UNICODE_SMALL_I_CIRCUMFLEX},
+    {0xa4, UNICODE_SMALL_I_DIARESIS},
+    {0xa5, UNICODE_SMALL_O_GRAVE},
+    {0xa6, UNICODE_SMALL_O_ACUTE},
+    {0xa7, UNICODE_SMALL_O_CIRCUMFLEX},
+    {0xa8, UNICODE_SMALL_O_DIARESIS},
+    {0xa9, UNICODE_SMALL_U_GRAVE},
+    {0xaa, UNICODE_SMALL_U_ACUTE},
+    {0xab, UNICODE_SMALL_U_CIRCUMFLEX},
+    {0xac, UNICODE_SMALL_U_DIARESIS},
+    {0xad, UNICODE_CAPITAL_N_TILDE},
+    {0xae, UNICODE_SMALL_N_TILDE},
+    {0xaf, UNICODE_INVERTED_QUESTION_MARK},
+    {0xb0, UNICODE_INVERTED_EXCLAMATION_MARK}
+};
+
+static const FontMap::gschar_unicode_map_t map_gta3_font_0_priv = {
+    {0x22, UNICODE_QUOTATION_MARK},
+    {0x23, UNICODE_NUMBER_SIGN},
+    {0x2a, UNICODE_HYPHEN_MINUS},
+    /*{0x2b, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x2f, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x3b, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x3c, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x3d, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x3e, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x40, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x5b, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x5c, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x5d, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x5e, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x5f, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x60, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x6b, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x7c, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
+};
+
+static const FontMap::gschar_unicode_map_t map_gta3_font_1_priv = {
+    {0x22, UNICODE_INCREMENT},
+    {0x23, UNICODE_REGISTERED_SIGN},
+    /*{0x2a, UNICODE_REPLACEMENT_CHARACTER},*/
+    {0x2b, UNICODE_PLUS_SIGN},
+    {0x2f, UNICODE_MULTIPLICATION_SIGN},
+    {0x3b, UNICODE_BLACK_UP_POINTING_TRIANGLE},
+    {0x3c, UNICODE_BLACK_LEFT_POINTING_POINTER},
+    {0x3d, UNICODE_BLACK_DOWN_POINTING_POINTER},
+    {0x3e, UNICODE_BLACK_RIGHT_POINTING_POINTER},
+    {0x40, UNICODE_TRADE_MARK},
+    {0x5b, UNICODE_SHIELD},
+    {0x5c, UNICODE_REVERSE_SOLIDUS},
+    {0x5d, UNICODE_BLACK_STAR},
+    {0x5e, UNICODE_NUMERO_SIGN},
+    {0x5f, UNICODE_DEGREES},
+    {0x60, UNICODE_COPYRIGHT_SIGN},
+    {0x7b, UNICODE_BLACK_HEART_SUIT},
+    {0x7c, UNICODE_WHITE_CIRCLE},
+    /*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
+};
+
+static const FontMap::gschar_unicode_map_t map_gta3_font_2_priv = {
+    {0x22, UNICODE_INCREMENT},
+    {0x23, UNICODE_NUMBER_SIGN},
+    {0x2a, UNICODE_ASTERISK},
+    {0x2b, UNICODE_PLUS_SIGN},
+    {0x2f, UNICODE_SOLIDUS},
+    {0x3b, UNICODE_SEMICOLON},
+    {0x3c, UNICODE_BLACK_LEFT_POINTING_POINTER},
+    {0x3d, UNICODE_EQUALS_SIGN},
+    {0x3e, UNICODE_BLACK_RIGHT_POINTING_POINTER},
+    {0x40, UNICODE_TRADE_MARK},
+    {0x5b, UNICODE_LEFT_SQUARE_BRACKET},
+    {0x5c, UNICODE_REVERSE_SOLIDUS},
+    {0x5d, UNICODE_RIGHT_SQUARE_BRACKET},
+    {0x5e, UNICODE_CIRCUMFLEX_ACCENT},
+    {0x5f, UNICODE_DEGREES},
+    {0x60, UNICODE_GRAVE_ACCENT},
+    /*{0x7b, UNICODE_REPLACEMENT_CHARACTER},*/
+    {0x7c, UNICODE_WHITE_CIRCLE},
+    /*{0x7d, UNICODE_REPLACEMENT_CHARACTER},*/
+    /*{0x7f, UNICODE_REPLACEMENT_CHARACTER},*/
+    {0xb1, UNICODE_ACUTE_ACCENT},
+};
+
+const FontMap map_gta3_font0({map_gta3_font_common, map_gta3_font_0_priv});
+const FontMap map_gta3_font1({map_gta3_font_common, map_gta3_font_1_priv});
+const FontMap map_gta3_font2({map_gta3_font_common, map_gta3_font_2_priv});
+
+const std::array<FontMap, 3> maps_gta3_font = {
+    FontMap({map_gta3_font_common, map_gta3_font_0_priv}),
+    FontMap({map_gta3_font_common, map_gta3_font_1_priv}),
+    FontMap({map_gta3_font_common, map_gta3_font_2_priv}),
+};
--- a/rwlib/source/fonts/FontMapGta3.hpp
+++ b/rwlib/source/fonts/FontMapGta3.hpp
@ -0,0 +1,28 @@
+#ifndef _RWLIB_FONTS_FONTMAPGTA3_HPP_
+#define _RWLIB_FONTS_FONTMAPGTA3_HPP_
+
+#include "FontMap.hpp"
+
+#include <array>
+
+/**
+ * Font mapping of font style 0: Pager
+ */
+extern const FontMap map_gta3_font0;
+
+/**
+ * Font mapping of font style 1: Pricedown
+ */
+extern const FontMap map_gta3_font1;
+
+/**
+ * Font mapping of font style 2: Arial
+ */
+extern const FontMap map_gta3_font2;
+
+/**
+ * Array of all font mappings.
+ */
+extern const std::array<FontMap, 3> maps_gta3_font;
+
+#endif
--- a/rwlib/source/fonts/GameTexts.cpp
+++ b/rwlib/source/fonts/GameTexts.cpp
--- a/rwlib/source/fonts/GameTexts.hpp
+++ b/rwlib/source/fonts/GameTexts.hpp
@ -1,5 +1,6 @@
-#ifndef _RWENGINE_GAMETEXTS_HPP_
-#define _RWENGINE_GAMETEXTS_HPP_
+#ifndef _RWLIB_FONTS_GAMETEXTS_HPP_
+#define _RWLIB_FONTS_GAMETEXTS_HPP_
+
 #include <cstdint>
 #include <string>
 #include <unordered_map>
@ -7,12 +8,14 @@
 /**
 * Each GXT char is just a 16-bit index into the font map.
 */
-using GameStringChar = uint16_t;
+using GameStringChar = std::uint16_t;
+
 /**
 * The game stores strings as 16-bit indexes into the font
 * texture, which is something simllar to ASCII.
 */
 using GameString = std::basic_string<GameStringChar>;
+
 /**
 * GXT keys are just 8 single byte chars.
 * Keys are small so should be subject to SSO
@ -32,8 +35,7 @@ GameString fromString(const std::string& str);
 /**
 * Since the encoding of symbols is arbitrary, these constants should be used in
 * hard-coded strings containing symbols outside of the ASCII-subset supported
- * by
- * all fonts
+ * by all fonts
 */
 namespace GameSymbols {
 static constexpr GameStringChar Money = '$';
--- a/rwlib/source/fonts/Unicode.cpp
+++ b/rwlib/source/fonts/Unicode.cpp
@ -0,0 +1,110 @@
+#include "Unicode.hpp"
+
+#include <istream>
+
+size_t unicode_to_utf8(unicode_t unicode, char c[4]) {
+    if (unicode < 0x80) { // 7 bits
+        c[0] = unicode;
+        return 1;
+    } else if (unicode < 0x800) { // 11 bits
+        c[0] = 0xc0 | (unicode >> 6);
+        c[1] = 0x80 | (unicode & 0x3f);
+        return 2;
+    } else if (unicode < 0x10000) { // 16 bits
+        c[0] = 0xe0 | (unicode >> 12);
+        c[1] = 0x80 | ((unicode >> 6) & 0x3f);
+        c[2] = 0x80 | (unicode & 0x3f);
+        return 3;
+    } else if (unicode < 0x110000) { // 21 bits
+        c[0] = 0xf0 | (unicode >> 18);
+        c[1] = 0x80 | ((unicode >> 12) & 0x3f);
+        c[2] = 0x80 | ((unicode >> 6) & 0x3f);
+        c[3] = 0x80 | (unicode & 0x3f);
+        return 4;
+    } else {
+        return unicode_to_utf8(UNICODE_REPLACEMENT_CHARACTER, c);
+    }
+}
+
+Utf8UnicodeIterator::Utf8UnicodeIterator() : m_is(nullptr), m_finished(true) {
+}
+
+Utf8UnicodeIterator::Utf8UnicodeIterator(std::istream &is) : m_is(&is), m_finished(false) {
+    next_unicode();
+}
+
+void Utf8UnicodeIterator::next_unicode() {
+    int c = m_is->get();
+    if (c == EOF) {
+        m_finished = true;
+        return;
+    }
+    char cc = static_cast<char>(c);
+    unicode_t unicode;
+    unsigned nb_bytes;
+    if ((cc & 0x80) == 0x00) {
+        unicode = cc;
+        nb_bytes = 0;
+    } else if ((c & 0xe0) == 0xc0) {
+        unicode = c & 0x1f;
+        nb_bytes = 1;
+    } else if ((c & 0xf0) == 0xe0) {
+        unicode = c & 0x0f;
+        nb_bytes = 2;
+    } else if ((c & 0xf8) == 0xf0) {
+        unicode = c & 0x07;
+        nb_bytes = 3;
+    } else {
+        unicode = UNICODE_REPLACEMENT_CHARACTER;
+        nb_bytes = 0;
+    }
+    while (nb_bytes != 0) {
+        c = m_is->get();
+        if (c == EOF) {
+            unicode = UNICODE_REPLACEMENT_CHARACTER;
+            m_finished = true;
+            break;
+        }
+        cc = static_cast<char>(c);
+        if ((c & 0xc0) != 0x80) {
+            unicode = UNICODE_REPLACEMENT_CHARACTER;
+            break;
+        }
+        unicode = (unicode << 6) | (c & 0x3f);
+        --nb_bytes;
+    }
+    m_unicode = unicode;
+}
+
+Utf8UnicodeIterator &Utf8UnicodeIterator::operator ++() {
+    next_unicode();
+    return *this;
+}
+
+unicode_t Utf8UnicodeIterator::unicode() const {
+    return m_unicode;
+}
+
+unicode_t Utf8UnicodeIterator::operator *() const {
+    return m_unicode;
+}
+
+bool Utf8UnicodeIterator::good() const {
+    return !m_finished;
+}
+
+Utf8UnicodeIteratorWrapper::Utf8UnicodeIteratorWrapper(const std::string &s)
+    : iss(s) {
+}
+
+Utf8UnicodeIterator Utf8UnicodeIteratorWrapper::begin() {
+    return Utf8UnicodeIterator(iss);
+}
+
+Utf8UnicodeIterator Utf8UnicodeIteratorWrapper::end() {
+    return Utf8UnicodeIterator();
+}
+
+bool Utf8UnicodeIterator::operator !=(const Utf8UnicodeIterator &) {
+    return good();
+}
--- a/rwlib/source/fonts/Unicode.hpp
+++ b/rwlib/source/fonts/Unicode.hpp
@ -0,0 +1,303 @@
+#ifndef _RWLIB_FONTS_UNICODE_HPP_
+#define _RWLIB_FONTS_UNICODE_HPP_
+
+#include <cstdint>
+#include <iosfwd>
+#include <sstream>
+
+/**
+ * unicode_t represent a unicode data point. (UTF-32)
+ */
+using unicode_t = char32_t;
+
+/**
+ * @brief unicode_to_utf8 Encode a unicode data point to a (non-zero terminated) utf-8 string.
+ * @param unicode The unicode data point to convert
+ * @param c buffer to write the utf-8 data to
+ * @return number of bytes written
+ */
+size_t unicode_to_utf8(unicode_t unicode, char c[4]);
+
+/**
+ * @brief Iterate over a utf8 string stream. Output unicode data points.
+ */
+class Utf8UnicodeIterator {
+private:
+    /**
+     * @brief m_is Pointer to the utf8 stream to iterate over.
+     */
+    std::istream *m_is;
+
+    /**
+     * @brief m_finished true if the stream is finished/invalid.
+     */
+    bool m_finished;
+
+    /**
+     * @brief m_unicode Current unicode point.
+     */
+    unicode_t m_unicode;
+
+    /**
+     * @brief next_unicode Move to the next unicode point.
+     */
+    void next_unicode();
+
+public:
+    /**
+     * @brief Create an empty unicode iterator. The iterator is not good.
+     */
+    Utf8UnicodeIterator();
+    /**
+     * @brief Create a unicode iterator that iterates over a utf8 stream.
+     * @param is utf8 stream
+     */
+    Utf8UnicodeIterator(std::istream &is);
+
+    /**
+     * @brief operator ++ Move to the next unicode character.
+     * Only call this function when the iterator is good.
+     * @return this object
+     */
+    Utf8UnicodeIterator &operator ++();
+    /**
+     * @brief operator != Returns true if this iterator is good
+     * @return true if this iterator is good
+     */
+    bool operator!=(const Utf8UnicodeIterator &);
+
+    /**
+     * @brief operator * Returns the current unicode point
+     * ONly call this function when the iterator is good.
+     * @return unicode point
+     */
+    unicode_t operator *() const;
+
+    /**
+     * @brief unicode Returns the current unicode point
+     * Only call this function when the iterator is good.
+     * @return unicode point
+     */
+    unicode_t unicode() const;
+
+    /**
+     * @brief good Checks whether this stream is good.
+     * A stream is good when a unicode point is available.
+     * @return true if the stream is good, else false.
+     */
+    bool good() const;
+};
+
+/**
+ * @brief This class lets you iterate over a utf8 string using for ranged loop
+ */
+class Utf8UnicodeIteratorWrapper {
+public:
+    /**
+     * @brief Create a new wraper wrapper.
+     * @param s The string that the utf8 to Unicode iterator should iterate over
+     */
+    Utf8UnicodeIteratorWrapper(const std::string &s);
+
+    /**
+     * @brief Return the iterator at the start. This one returns unicode points.
+     * @return Utf8UnicodeIterator of which will read the unicode points.
+     */
+    Utf8UnicodeIterator begin();
+    /**
+     * @brief Return the unicode iterator at the end. This one is not good.
+     * No unicode points can be read from this object.
+     * @return Utf8UnicodeIterator Unicode iterator that is not good.
+     */
+    Utf8UnicodeIterator end();
+private:
+    /**
+     * @brief iss This utf8 stream holds the wrapped string and returns bytes to convert to unicode.
+     */
+    std::istringstream iss;
+};
+
+/**
+ * Unicode data points used by OpenRW.
+     */
+    enum UnicodeValue {
+    UNICODE_TAB                             = 0x09, /* '\t' */
+    UNICODE_NEW_LINE                        = 0x0a, /* '\n' */
+    UNICODE_CARRIAGE_RETURN                 = 0x0d, /* '\r' */
+
+    UNICODE_SPACE                           = 0x20, /* ' ' */
+    UNICODE_EXCLAMATION_MARK                = 0x21, /* '!' */
+    UNICODE_QUOTATION_MARK                  = 0x22, /* '"' */
+    UNICODE_NUMBER_SIGN                     = 0x23, /* '#' */
+    UNICODE_DOLLAR_SIGN                     = 0x24, /* '$' */
+    UNICODE_PROCENT_SIGN                    = 0x25, /* '%' */
+    UNICODE_AMPERSAND                       = 0x26, /* '&' */
+    UNICODE_APOSTROPHE                      = 0x27, /* ''' */
+    UNICODE_LEFT_PARENTHESIS                = 0x28, /* '(' */
+    UNICODE_RIGHT_PARENTHESIS               = 0x29, /* ')' */
+    UNICODE_ASTERISK                        = 0x2a, /* '*' */
+    UNICODE_PLUS_SIGN                       = 0x2b, /* '+' */
+    UNICODE_COMMA                           = 0x2c, /* ',' */
+    UNICODE_HYPHEN_MINUS                    = 0x2d, /* '-' */
+    UNICODE_FULL_STOP                       = 0x2e, /* '.' */
+    UNICODE_SOLIDUS                         = 0x2f, /* '/' */
+
+    UNICODE_DIGIT_0                         = 0x30, /* '0' */
+    UNICODE_DIGIT_1                         = 0x31, /* '1' */
+    UNICODE_DIGIT_2                         = 0x32, /* '2' */
+    UNICODE_DIGIT_3                         = 0x33, /* '3' */
+    UNICODE_DIGIT_4                         = 0x34, /* '4' */
+    UNICODE_DIGIT_5                         = 0x35, /* '5' */
+    UNICODE_DIGIT_6                         = 0x36, /* '6' */
+    UNICODE_DIGIT_7                         = 0x37, /* '7' */
+    UNICODE_DIGIT_8                         = 0x38, /* '8' */
+    UNICODE_DIGIT_9                         = 0x39, /* '9' */
+
+    UNICODE_COLON                           = 0x3a, /* ':' */
+    UNICODE_SEMICOLON                       = 0x3b, /* ';' */
+    UNICODE_LESS_THAN_SIGN                  = 0x3c, /* '<' */
+    UNICODE_EQUALS_SIGN                     = 0x3d, /* '=' */
+    UNICODE_GREATER_THAN_SIGN               = 0x3e, /* '>' */
+    UNICODE_QUESTION_MARK                   = 0x3f, /* '?' */
+
+    UNICODE_COMMERCIAL_AT                   = 0x40, /* '@' */
+    UNICODE_CAPITAL_A                       = 0x41, /* 'A' */
+    UNICODE_CAPITAL_B                       = 0x42, /* 'B' */
+    UNICODE_CAPITAL_C                       = 0x43, /* 'C' */
+    UNICODE_CAPITAL_D                       = 0x44, /* 'D' */
+    UNICODE_CAPITAL_E                       = 0x45, /* 'E' */
+    UNICODE_CAPITAL_F                       = 0x46, /* 'F' */
+    UNICODE_CAPITAL_G                       = 0x47, /* 'G' */
+    UNICODE_CAPITAL_H                       = 0x48, /* 'H' */
+    UNICODE_CAPITAL_I                       = 0x49, /* 'I' */
+    UNICODE_CAPITAL_J                       = 0x4a, /* 'J' */
+    UNICODE_CAPITAL_K                       = 0x4b, /* 'K' */
+    UNICODE_CAPITAL_L                       = 0x4c, /* 'L' */
+    UNICODE_CAPITAL_M                       = 0x4d, /* 'M' */
+    UNICODE_CAPITAL_N                       = 0x4e, /* 'N' */
+    UNICODE_CAPITAL_O                       = 0x4f, /* 'O' */
+    UNICODE_CAPITAL_P                       = 0x50, /* 'P' */
+    UNICODE_CAPITAL_Q                       = 0x51, /* 'Q' */
+    UNICODE_CAPITAL_R                       = 0x52, /* 'R' */
+    UNICODE_CAPITAL_S                       = 0x53, /* 'S' */
+    UNICODE_CAPITAL_T                       = 0x54, /* 'T' */
+    UNICODE_CAPITAL_U                       = 0x55, /* 'U' */
+    UNICODE_CAPITAL_V                       = 0x56, /* 'V' */
+    UNICODE_CAPITAL_W                       = 0x57, /* 'W' */
+    UNICODE_CAPITAL_X                       = 0x58, /* 'X' */
+    UNICODE_CAPITAL_Y                       = 0x59, /* 'Y' */
+    UNICODE_CAPITAL_Z                       = 0x5a, /* 'Z' */
+
+    UNICODE_LEFT_SQUARE_BRACKET             = 0x5b, /* '[' */
+    UNICODE_REVERSE_SOLIDUS                 = 0x5c, /* '\' */
+    UNICODE_RIGHT_SQUARE_BRACKET            = 0x5d, /* ']' */
+    UNICODE_CIRCUMFLEX_ACCENT               = 0x5e, /* '^' */
+    UNICODE_LOW_LINE                        = 0x5f, /* '_' */
+
+    UNICODE_GRAVE_ACCENT                    = 0x60, /* '`' */
+    UNICODE_SMALL_A                         = 0x61, /* 'a' */
+    UNICODE_SMALL_B                         = 0x62, /* 'b' */
+    UNICODE_SMALL_C                         = 0x63, /* 'c' */
+    UNICODE_SMALL_D                         = 0x64, /* 'd' */
+    UNICODE_SMALL_E                         = 0x65, /* 'e' */
+    UNICODE_SMALL_F                         = 0x66, /* 'f' */
+    UNICODE_SMALL_G                         = 0x67, /* 'g' */
+    UNICODE_SMALL_H                         = 0x68, /* 'h' */
+    UNICODE_SMALL_I                         = 0x69, /* 'i' */
+    UNICODE_SMALL_J                         = 0x6a, /* 'j' */
+    UNICODE_SMALL_K                         = 0x6b, /* 'k' */
+    UNICODE_SMALL_L                         = 0x6c, /* 'l' */
+    UNICODE_SMALL_M                         = 0x6d, /* 'm' */
+    UNICODE_SMALL_N                         = 0x6e, /* 'n' */
+    UNICODE_SMALL_O                         = 0x6f, /* 'o' */
+    UNICODE_SMALL_P                         = 0x70, /* 'p' */
+    UNICODE_SMALL_Q                         = 0x71, /* 'q' */
+    UNICODE_SMALL_R                         = 0x72, /* 'r' */
+    UNICODE_SMALL_S                         = 0x73, /* 's' */
+    UNICODE_SMALL_T                         = 0x74, /* 't' */
+    UNICODE_SMALL_U                         = 0x75, /* 'u' */
+    UNICODE_SMALL_V                         = 0x76, /* 'v' */
+    UNICODE_SMALL_W                         = 0x77, /* 'w' */
+    UNICODE_SMALL_X                         = 0x78, /* 'x' */
+    UNICODE_SMALL_Y                         = 0x79, /* 'y' */
+    UNICODE_SMALL_Z                         = 0x7a, /* 'z' */
+
+    UNICODE_LEFT_CURLY_BRACKET              = 0x7b, /* '{' */
+    UNICODE_VERTICAL_LINE                   = 0x7c, /* '|' */
+    UNICODE_RIGHT_CURLY_BRACKET             = 0x7d, /* '}' */
+    UNICODE_TILDE                           = 0x7e, /* '~' */
+
+    UNICODE_INVERTED_EXCLAMATION_MARK       = 0xa1, /* '¡' */
+    UNICODE_COPYRIGHT_SIGN                  = 0xa9, /* '©' */
+    UNICODE_REGISTERED_SIGN                 = 0xae, /* '®' */
+    UNICODE_DEGREES                         = 0xb0, /* '°' */
+    UNICODE_ACUTE_ACCENT                    = 0xb4, /* '´' */
+    UNICODE_INVERTED_QUESTION_MARK          = 0xbf, /* '¿' */
+
+    UNICODE_CAPITAL_A_GRAVE                 = 0xc0, /* 'À' */
+    UNICODE_CAPITAL_A_ACUTE                 = 0xc1, /* 'Á' */
+    UNICODE_CAPITAL_A_CIRCUMFLEX            = 0xc2, /* 'Â' */
+    UNICODE_CAPITAL_A_DIARESIS              = 0xc4, /* 'Ä' */
+    UNICODE_CAPITAL_AE                      = 0xc6, /* 'Æ' */
+    UNICODE_CAPITAL_C_CEDILLA               = 0xc7, /* 'Ç' */
+    UNICODE_CAPITAL_E_GRAVE                 = 0xc8, /* 'È' */
+    UNICODE_CAPITAL_E_ACUTE                 = 0xc9, /* 'É' */
+    UNICODE_CAPITAL_E_CIRCUMFLEX            = 0xca, /* 'Ê' */
+    UNICODE_CAPITAL_E_DIARESIS              = 0xcb, /* 'Ë' */
+    UNICODE_CAPITAL_I_GRAVE                 = 0xcc, /* 'Ì' */
+    UNICODE_CAPITAL_I_ACUTE                 = 0xcd, /* 'Í' */
+    UNICODE_CAPITAL_I_CIRCUMFLEX            = 0xce, /* 'Î' */
+    UNICODE_CAPITAL_I_DIARESIS              = 0xcf, /* 'Ï' */
+    UNICODE_CAPITAL_N_TILDE                 = 0xd1, /* 'Ñ' */
+    UNICODE_CAPITAL_O_GRAVE                 = 0xd2, /* 'Ò' */
+    UNICODE_CAPITAL_O_ACUTE                 = 0xd3, /* 'Ó' */
+    UNICODE_CAPITAL_O_CIRCUMFLEX            = 0xd4, /* 'Ô' */
+    UNICODE_CAPITAL_O_DIARESIS              = 0xd6, /* 'Ö' */
+    UNICODE_MULTIPLICATION_SIGN             = 0xd7, /* '×' */
+    UNICODE_CAPITAL_U_GRAVE                 = 0xd9, /* 'Ù' */
+    UNICODE_CAPITAL_U_ACUTE                 = 0xda, /* 'Ú' */
+    UNICODE_CAPITAL_U_CIRCUMFLEX            = 0xdb, /* 'Û' */
+    UNICODE_CAPITAL_U_DIARESIS              = 0xdc, /* 'Ü' */
+    UNICODE_SMALL_SHARP_S                   = 0xdf, /* 'ß' */
+
+    UNICODE_SMALL_A_GRAVE                   = 0xe0, /* 'à' */
+    UNICODE_SMALL_A_ACUTE                   = 0xe1, /* 'á' */
+    UNICODE_SMALL_A_CIRCUMFLEX              = 0xe2, /* 'â' */
+    UNICODE_SMALL_A_DIARESIS                = 0xe4, /* 'ä' */
+    UNICODE_SMALL_AE                        = 0xe6, /* 'æ' */
+    UNICODE_SMALL_C_CEDILLA                 = 0xe7, /* 'ç' */
+    UNICODE_SMALL_E_GRAVE                   = 0xe8, /* 'è' */
+    UNICODE_SMALL_E_ACUTE                   = 0xe9, /* 'é' */
+    UNICODE_SMALL_E_CIRCUMFLEX              = 0xea, /* 'ê' */
+    UNICODE_SMALL_E_DIARESIS                = 0xeb, /* 'ë' */
+    UNICODE_SMALL_I_GRAVE                   = 0xec, /* 'ì' */
+    UNICODE_SMALL_I_ACUTE                   = 0xed, /* 'í' */
+    UNICODE_SMALL_I_CIRCUMFLEX              = 0xee, /* 'î' */
+    UNICODE_SMALL_I_DIARESIS                = 0xef, /* 'ï' */
+    UNICODE_SMALL_N_TILDE                   = 0xf1, /* 'ñ' */
+    UNICODE_SMALL_O_GRAVE                   = 0xf2, /* 'ò' */
+    UNICODE_SMALL_O_ACUTE                   = 0xf3, /* 'ó' */
+    UNICODE_SMALL_O_CIRCUMFLEX              = 0xf4, /* 'ô' */
+    UNICODE_SMALL_O_DIARESIS                = 0xf6, /* 'ö' */
+    UNICODE_SMALL_U_GRAVE                   = 0xf9, /* 'ù' */
+    UNICODE_SMALL_U_ACUTE                   = 0xfa, /* 'ú' */
+    UNICODE_SMALL_U_CIRCUMFLEX              = 0xfb, /* 'û' */
+    UNICODE_SMALL_U_DIARESIS                = 0xfc, /* 'ü' */
+
+    UNICODE_NUMERO_SIGN                     = 0x2116, /* '№' */
+    UNICODE_TRADE_MARK                      = 0x2122, /* '™' */
+    UNICODE_INCREMENT                       = 0x2206, /* '∆' */
+    UNICODE_BLACK_UP_POINTING_TRIANGLE      = 0x25b2, /* '▲' */
+    UNICODE_BLACK_RIGHT_POINTING_POINTER    = 0x25ba, /* '►' */
+    UNICODE_BLACK_DOWN_POINTING_POINTER     = 0x25bc, /* '▼' */
+    UNICODE_BLACK_LEFT_POINTING_POINTER     = 0x25c4, /* '◄' */
+    UNICODE_WHITE_CIRCLE                    = 0x25cb, /* '○' */
+    UNICODE_BLACK_STAR                      = 0x2605, /* '★' */
+    UNICODE_BLACK_HEART_SUIT                = 0x2665, /* '♥' */
+    UNICODE_CROSS_MARK                      = 0x274c, /* '❌' */
+    UNICODE_REPLACEMENT_CHARACTER           = 0xfffd, /* '<27>' */
+
+    UNICODE_SHIELD                          = 0x1f6e1, /* '🛡' */
+};
+
+#endif
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -31,6 +31,7 @@ set(TESTS
    SaveGame
    ScriptMachine
    State
+    StringEncoding
    Text
    TrafficDirector
    Vehicle
--- a/tests/test_StringEncoding.cpp
+++ b/tests/test_StringEncoding.cpp
@ -0,0 +1,200 @@
+#include <boost/test/unit_test.hpp>
+#include <fonts/FontMapGta3.hpp>
+#include <fonts/Unicode.hpp>
+#include <iostream>
+#include <vector>
+
+BOOST_TEST_DONT_PRINT_LOG_VALUE(GameString)
+
+/**
+ * All tests about changing string encodings.
+ */
+BOOST_AUTO_TEST_SUITE(StringEncodingTests)
+
+BOOST_AUTO_TEST_CASE(unicode_to_char_1char) {
+    char val[4];
+    unicode_t u = 0x3f; /* QUESTION MARK */
+    auto nb = unicode_to_utf8(u, val);
+
+    BOOST_CHECK_EQUAL(nb, 1);
+
+    const char ref[1] = {0x3f};
+    BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
+}
+
+BOOST_AUTO_TEST_CASE(unicode_to_char_2char) {
+    char val[4];
+    unicode_t u = 0x00e6; /* LATIN SMALL LETTER AE */
+    auto nb = unicode_to_utf8(u, val);
+
+    BOOST_CHECK_EQUAL(nb, 2);
+
+    const char ref[2] = {char(0xc3), char(0xa6)};
+    BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
+}
+
+BOOST_AUTO_TEST_CASE(unicode_to_char_3char) {
+    char val[4];
+    unicode_t u = 0x0f45; /* TIBETAN LETTER CA */
+    auto nb = unicode_to_utf8(u, val);
+
+    BOOST_CHECK_EQUAL(nb, 3);
+
+    const char ref[3] = {char(0xe0), char(0xbd), char(0x85)};
+    BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
+}
+
+BOOST_AUTO_TEST_CASE(unicode_to_char_4char) {
+    char val[4];
+    unicode_t u = 0x10454; /* SHAVIAN LETTER THIGH */
+    auto nb = unicode_to_utf8(u, val);
+
+    BOOST_CHECK_EQUAL(nb, 4);
+
+    const char ref[4] = {char(0xf0), char(0x90), char(0x91), char(0x94)};
+    BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
+}
+
+BOOST_AUTO_TEST_CASE(unicode_to_char_illegal) {
+    char val[4];
+    unicode_t u = 0x124544; /* Illegal unicode */
+    auto nb = unicode_to_utf8(u, val);
+
+    BOOST_CHECK_EQUAL(nb, 3);
+
+    const char ref[4] = {char(0xef), char(0xbf), char(0xbd)}; // utf-8 encoding of replacement character
+    BOOST_CHECK_EQUAL_COLLECTIONS(val, val + nb, ref, ref + nb);
+}
+
+BOOST_AUTO_TEST_CASE(utf8_iterator_simple) {
+    std::string s("Hello World", 12);
+    std::istringstream iss(s);
+    Utf8UnicodeIterator it(iss);
+
+    BOOST_CHECK_EQUAL(s.size(), 12);
+
+    for (size_t i=0; i < s.size(); ++i) {
+        BOOST_CHECK(it.good());
+        BOOST_CHECK_EQUAL(it.unicode(), s[i]);
+        ++it;
+    }
+    BOOST_CHECK(!it.good());
+}
+
+BOOST_AUTO_TEST_CASE(utf8_iterator_invalid) {
+    const unsigned char s[] = {'a', 0xff, 'b', 0xff, 'c', 0x00};
+    std::istringstream iss(reinterpret_cast<const char *>(s));
+    Utf8UnicodeIterator it(iss);
+
+    BOOST_CHECK_EQUAL(sizeof(s), 6);
+
+    BOOST_CHECK(it.good());
+    BOOST_CHECK_EQUAL('a', it.unicode());
+
+    ++it;
+    BOOST_CHECK(it.good());
+    BOOST_CHECK_EQUAL(UNICODE_REPLACEMENT_CHARACTER, it.unicode());
+
+    ++it;
+    BOOST_CHECK(it.good());
+    BOOST_CHECK_EQUAL('b', it.unicode());
+
+    ++it;
+    BOOST_CHECK(it.good());
+    BOOST_CHECK_EQUAL(UNICODE_REPLACEMENT_CHARACTER, it.unicode());
+
+    ++it;
+    BOOST_CHECK(it.good());
+    BOOST_CHECK_EQUAL('c', it.unicode());
+
+    ++it;
+    BOOST_CHECK(!it.good());
+}
+
+typedef struct {
+    const char *utf8;
+    unicode_t unicode;
+} utf8_unicode_t;
+
+const utf8_unicode_t utf_unicode_data[] = {
+    {
+        "\x2e", 0x2e, /* full stop*/
+    }, {
+        "\x77", 0x77, /* w */
+    }, {
+        "\xc3\x97", 0xd7, /* multiplication sign */
+    }, {
+        "\xd8\x8c", 0x060c, /* Arabic comma */
+    }, {
+        "\xe2\x9b\xb0", 0x26f0, /* mountain */
+    }, {
+        "\xe2\x8a\xa8", 0x22a8, /* true */
+    }, {
+        "\xf0\x9f\xa7\x9b", 0x1f9db, /* vampire */
+    }, {
+        "\xf0\x9f\xa4\x9f", 0x1f91f, /* I love you hand sign */
+    }, {
+        "", 0, /* sentinel */
+    }
+};
+
+std::string createUtf8String() {
+    std::ostringstream oss;
+    for (const utf8_unicode_t *uu = utf_unicode_data; uu->unicode; ++uu) {
+        oss << uu->utf8;
+    }
+    return oss.str();
+}
+
+BOOST_AUTO_TEST_CASE(utf8_iterator_mixed) {
+    std::string str = createUtf8String();
+    std::istringstream iss(str);
+    Utf8UnicodeIterator it(iss);
+
+    size_t nb = 0;
+    for (const utf8_unicode_t *uu = utf_unicode_data; uu->unicode; ++uu) {
+        BOOST_CHECK(it.good());
+        BOOST_CHECK_EQUAL(it.unicode(), uu->unicode);
+        ++it;
+        ++nb;
+    }
+    BOOST_CHECK(!it.good());
+}
+
+BOOST_AUTO_TEST_CASE(utf8_iterator_ranged_for_loop) {
+    std::string str = createUtf8String();
+    std::istringstream iss(str);
+    Utf8UnicodeIterator it(iss);
+
+    size_t nb = 0;
+    const utf8_unicode_t *uu = utf_unicode_data;
+    for (unicode_t u : Utf8UnicodeIteratorWrapper(str)) {
+        BOOST_CHECK_EQUAL(u, uu->unicode);
+        ++it;
+        ++nb;
+        ++uu;
+    }
+    BOOST_CHECK(!it.good());
+}
+
+BOOST_AUTO_TEST_CASE(GameStringChar_simple) {
+    for (const auto &fontmap : maps_gta3_font) {
+        auto c = fontmap.to_GameStringChar('x');
+        BOOST_CHECK_EQUAL(c, GameStringChar('x'));
+        auto u = fontmap.to_unicode('x');
+        BOOST_CHECK_EQUAL(u, unicode_t('x'));
+    }
+}
+
+BOOST_AUTO_TEST_CASE(GameString_simple) {
+    std::string s = "Hello world";
+    for (const auto &fontmap : maps_gta3_font) {
+        auto gs = fontmap.to_GameString(s);
+        BOOST_CHECK_EQUAL(s.size(), gs.length());
+        for (size_t i = 0; i < s.size(); ++i) {
+            BOOST_CHECK_EQUAL(gs[i], GameStringChar(s[i]));
+        }
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()
--- a/tests/test_Text.cpp
+++ b/tests/test_Text.cpp
@ -1,6 +1,6 @@
 #include <boost/test/unit_test.hpp>
-#include <data/GameTexts.hpp>
 #include <engine/ScreenText.hpp>
+#include <fonts/GameTexts.hpp>
 #include <loaders/LoaderGXT.hpp>
 #include "test_Globals.hpp"

@ -23,6 +23,14 @@ BOOST_AUTO_TEST_CASE(load_test) {
    }
 }

+BOOST_AUTO_TEST_CASE(special_chars) {
+    {
+        auto newline = T("\n");
+        BOOST_CHECK_EQUAL(newline.size(), 1);
+        BOOST_CHECK_EQUAL(newline[0], '\n');
+    }
+}
+
 BOOST_AUTO_TEST_CASE(big_test) {
    // Check that makeBig creates a text in the right place
    {