FTXUI/src/ftxui/screen/string.cpp

601 lines
22 KiB
C++
Raw Normal View History

// Most of this code is borrowed from:
// Markus Kuhn -- 2007-05-26 (Unicode 5.0)
// Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
// Thanks you!
//
// Modified by Arthur Sonzogni for FTXUI.
#include "ftxui/screen/string.hpp"
2018-09-18 14:48:40 +08:00
2022-03-31 08:17:43 +08:00
#include <array> // for array
#include <codecvt> // for codecvt_utf8_utf16
#include <cstdint> // for uint32_t, uint8_t
#include <locale> // for wstring_convert
#include <string> // for string, basic_string, wstring
#include "ftxui/screen/deprecated.hpp" // for wchar_width, wstring_width
namespace {
struct Interval {
uint32_t first;
uint32_t last;
};
// Sorted list of non-overlapping intervals of non-spacing characters
// generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
// As of Unicode 13.0.0
const std::array<Interval, 324> g_combining_characters = {
Interval{0x00300, 0x0036f}, Interval{0x00483, 0x00489},
Interval{0x00591, 0x005bd}, Interval{0x005bf, 0x005bf},
Interval{0x005c1, 0x005c2}, Interval{0x005c4, 0x005c5},
Interval{0x005c7, 0x005c7}, Interval{0x00610, 0x0061a},
Interval{0x0064b, 0x0065f}, Interval{0x00670, 0x00670},
Interval{0x006d6, 0x006dc}, Interval{0x006df, 0x006e4},
Interval{0x006e7, 0x006e8}, Interval{0x006ea, 0x006ed},
Interval{0x00711, 0x00711}, Interval{0x00730, 0x0074a},
Interval{0x007a6, 0x007b0}, Interval{0x007eb, 0x007f3},
Interval{0x007fd, 0x007fd}, Interval{0x00816, 0x00819},
Interval{0x0081b, 0x00823}, Interval{0x00825, 0x00827},
Interval{0x00829, 0x0082d}, Interval{0x00859, 0x0085b},
Interval{0x008d3, 0x008e1}, Interval{0x008e3, 0x00902},
Interval{0x0093a, 0x0093a}, Interval{0x0093c, 0x0093c},
Interval{0x00941, 0x00948}, Interval{0x0094d, 0x0094d},
Interval{0x00951, 0x00957}, Interval{0x00962, 0x00963},
Interval{0x00981, 0x00981}, Interval{0x009bc, 0x009bc},
Interval{0x009c1, 0x009c4}, Interval{0x009cd, 0x009cd},
Interval{0x009e2, 0x009e3}, Interval{0x009fe, 0x009fe},
Interval{0x00a01, 0x00a02}, Interval{0x00a3c, 0x00a3c},
Interval{0x00a41, 0x00a42}, Interval{0x00a47, 0x00a48},
Interval{0x00a4b, 0x00a4d}, Interval{0x00a51, 0x00a51},
Interval{0x00a70, 0x00a71}, Interval{0x00a75, 0x00a75},
Interval{0x00a81, 0x00a82}, Interval{0x00abc, 0x00abc},
Interval{0x00ac1, 0x00ac5}, Interval{0x00ac7, 0x00ac8},
Interval{0x00acd, 0x00acd}, Interval{0x00ae2, 0x00ae3},
Interval{0x00afa, 0x00aff}, Interval{0x00b01, 0x00b01},
Interval{0x00b3c, 0x00b3c}, Interval{0x00b3f, 0x00b3f},
Interval{0x00b41, 0x00b44}, Interval{0x00b4d, 0x00b4d},
Interval{0x00b55, 0x00b56}, Interval{0x00b62, 0x00b63},
Interval{0x00b82, 0x00b82}, Interval{0x00bc0, 0x00bc0},
Interval{0x00bcd, 0x00bcd}, Interval{0x00c00, 0x00c00},
Interval{0x00c04, 0x00c04}, Interval{0x00c3e, 0x00c40},
Interval{0x00c46, 0x00c48}, Interval{0x00c4a, 0x00c4d},
Interval{0x00c55, 0x00c56}, Interval{0x00c62, 0x00c63},
Interval{0x00c81, 0x00c81}, Interval{0x00cbc, 0x00cbc},
Interval{0x00cbf, 0x00cbf}, Interval{0x00cc6, 0x00cc6},
Interval{0x00ccc, 0x00ccd}, Interval{0x00ce2, 0x00ce3},
Interval{0x00d00, 0x00d01}, Interval{0x00d3b, 0x00d3c},
Interval{0x00d41, 0x00d44}, Interval{0x00d4d, 0x00d4d},
Interval{0x00d62, 0x00d63}, Interval{0x00d81, 0x00d81},
Interval{0x00dca, 0x00dca}, Interval{0x00dd2, 0x00dd4},
Interval{0x00dd6, 0x00dd6}, Interval{0x00e31, 0x00e31},
Interval{0x00e34, 0x00e3a}, Interval{0x00e47, 0x00e4e},
Interval{0x00eb1, 0x00eb1}, Interval{0x00eb4, 0x00ebc},
Interval{0x00ec8, 0x00ecd}, Interval{0x00f18, 0x00f19},
Interval{0x00f35, 0x00f35}, Interval{0x00f37, 0x00f37},
Interval{0x00f39, 0x00f39}, Interval{0x00f71, 0x00f7e},
Interval{0x00f80, 0x00f84}, Interval{0x00f86, 0x00f87},
Interval{0x00f8d, 0x00f97}, Interval{0x00f99, 0x00fbc},
Interval{0x00fc6, 0x00fc6}, Interval{0x0102d, 0x01030},
Interval{0x01032, 0x01037}, Interval{0x01039, 0x0103a},
Interval{0x0103d, 0x0103e}, Interval{0x01058, 0x01059},
Interval{0x0105e, 0x01060}, Interval{0x01071, 0x01074},
Interval{0x01082, 0x01082}, Interval{0x01085, 0x01086},
Interval{0x0108d, 0x0108d}, Interval{0x0109d, 0x0109d},
Interval{0x0135d, 0x0135f}, Interval{0x01712, 0x01714},
Interval{0x01732, 0x01734}, Interval{0x01752, 0x01753},
Interval{0x01772, 0x01773}, Interval{0x017b4, 0x017b5},
Interval{0x017b7, 0x017bd}, Interval{0x017c6, 0x017c6},
Interval{0x017c9, 0x017d3}, Interval{0x017dd, 0x017dd},
Interval{0x0180b, 0x0180d}, Interval{0x01885, 0x01886},
Interval{0x018a9, 0x018a9}, Interval{0x01920, 0x01922},
Interval{0x01927, 0x01928}, Interval{0x01932, 0x01932},
Interval{0x01939, 0x0193b}, Interval{0x01a17, 0x01a18},
Interval{0x01a1b, 0x01a1b}, Interval{0x01a56, 0x01a56},
Interval{0x01a58, 0x01a5e}, Interval{0x01a60, 0x01a60},
Interval{0x01a62, 0x01a62}, Interval{0x01a65, 0x01a6c},
Interval{0x01a73, 0x01a7c}, Interval{0x01a7f, 0x01a7f},
Interval{0x01ab0, 0x01ac0}, Interval{0x01b00, 0x01b03},
Interval{0x01b34, 0x01b34}, Interval{0x01b36, 0x01b3a},
Interval{0x01b3c, 0x01b3c}, Interval{0x01b42, 0x01b42},
Interval{0x01b6b, 0x01b73}, Interval{0x01b80, 0x01b81},
Interval{0x01ba2, 0x01ba5}, Interval{0x01ba8, 0x01ba9},
Interval{0x01bab, 0x01bad}, Interval{0x01be6, 0x01be6},
Interval{0x01be8, 0x01be9}, Interval{0x01bed, 0x01bed},
Interval{0x01bef, 0x01bf1}, Interval{0x01c2c, 0x01c33},
Interval{0x01c36, 0x01c37}, Interval{0x01cd0, 0x01cd2},
Interval{0x01cd4, 0x01ce0}, Interval{0x01ce2, 0x01ce8},
Interval{0x01ced, 0x01ced}, Interval{0x01cf4, 0x01cf4},
Interval{0x01cf8, 0x01cf9}, Interval{0x01dc0, 0x01df9},
Interval{0x01dfb, 0x01dff}, Interval{0x020d0, 0x020f0},
Interval{0x02cef, 0x02cf1}, Interval{0x02d7f, 0x02d7f},
Interval{0x02de0, 0x02dff}, Interval{0x0302a, 0x0302d},
Interval{0x03099, 0x0309a}, Interval{0x0a66f, 0x0a672},
Interval{0x0a674, 0x0a67d}, Interval{0x0a69e, 0x0a69f},
Interval{0x0a6f0, 0x0a6f1}, Interval{0x0a802, 0x0a802},
Interval{0x0a806, 0x0a806}, Interval{0x0a80b, 0x0a80b},
Interval{0x0a825, 0x0a826}, Interval{0x0a82c, 0x0a82c},
Interval{0x0a8c4, 0x0a8c5}, Interval{0x0a8e0, 0x0a8f1},
Interval{0x0a8ff, 0x0a8ff}, Interval{0x0a926, 0x0a92d},
Interval{0x0a947, 0x0a951}, Interval{0x0a980, 0x0a982},
Interval{0x0a9b3, 0x0a9b3}, Interval{0x0a9b6, 0x0a9b9},
Interval{0x0a9bc, 0x0a9bd}, Interval{0x0a9e5, 0x0a9e5},
Interval{0x0aa29, 0x0aa2e}, Interval{0x0aa31, 0x0aa32},
Interval{0x0aa35, 0x0aa36}, Interval{0x0aa43, 0x0aa43},
Interval{0x0aa4c, 0x0aa4c}, Interval{0x0aa7c, 0x0aa7c},
Interval{0x0aab0, 0x0aab0}, Interval{0x0aab2, 0x0aab4},
Interval{0x0aab7, 0x0aab8}, Interval{0x0aabe, 0x0aabf},
Interval{0x0aac1, 0x0aac1}, Interval{0x0aaec, 0x0aaed},
Interval{0x0aaf6, 0x0aaf6}, Interval{0x0abe5, 0x0abe5},
Interval{0x0abe8, 0x0abe8}, Interval{0x0abed, 0x0abed},
Interval{0x0fb1e, 0x0fb1e}, Interval{0x0fe00, 0x0fe0f},
Interval{0x0fe20, 0x0fe2f}, Interval{0x101fd, 0x101fd},
Interval{0x102e0, 0x102e0}, Interval{0x10376, 0x1037a},
Interval{0x10a01, 0x10a03}, Interval{0x10a05, 0x10a06},
Interval{0x10a0c, 0x10a0f}, Interval{0x10a38, 0x10a3a},
Interval{0x10a3f, 0x10a3f}, Interval{0x10ae5, 0x10ae6},
Interval{0x10d24, 0x10d27}, Interval{0x10eab, 0x10eac},
Interval{0x10f46, 0x10f50}, Interval{0x11001, 0x11001},
Interval{0x11038, 0x11046}, Interval{0x1107f, 0x11081},
Interval{0x110b3, 0x110b6}, Interval{0x110b9, 0x110ba},
Interval{0x11100, 0x11102}, Interval{0x11127, 0x1112b},
Interval{0x1112d, 0x11134}, Interval{0x11173, 0x11173},
Interval{0x11180, 0x11181}, Interval{0x111b6, 0x111be},
Interval{0x111c9, 0x111cc}, Interval{0x111cf, 0x111cf},
Interval{0x1122f, 0x11231}, Interval{0x11234, 0x11234},
Interval{0x11236, 0x11237}, Interval{0x1123e, 0x1123e},
Interval{0x112df, 0x112df}, Interval{0x112e3, 0x112ea},
Interval{0x11300, 0x11301}, Interval{0x1133b, 0x1133c},
Interval{0x11340, 0x11340}, Interval{0x11366, 0x1136c},
Interval{0x11370, 0x11374}, Interval{0x11438, 0x1143f},
Interval{0x11442, 0x11444}, Interval{0x11446, 0x11446},
Interval{0x1145e, 0x1145e}, Interval{0x114b3, 0x114b8},
Interval{0x114ba, 0x114ba}, Interval{0x114bf, 0x114c0},
Interval{0x114c2, 0x114c3}, Interval{0x115b2, 0x115b5},
Interval{0x115bc, 0x115bd}, Interval{0x115bf, 0x115c0},
Interval{0x115dc, 0x115dd}, Interval{0x11633, 0x1163a},
Interval{0x1163d, 0x1163d}, Interval{0x1163f, 0x11640},
Interval{0x116ab, 0x116ab}, Interval{0x116ad, 0x116ad},
Interval{0x116b0, 0x116b5}, Interval{0x116b7, 0x116b7},
Interval{0x1171d, 0x1171f}, Interval{0x11722, 0x11725},
Interval{0x11727, 0x1172b}, Interval{0x1182f, 0x11837},
Interval{0x11839, 0x1183a}, Interval{0x1193b, 0x1193c},
Interval{0x1193e, 0x1193e}, Interval{0x11943, 0x11943},
Interval{0x119d4, 0x119d7}, Interval{0x119da, 0x119db},
Interval{0x119e0, 0x119e0}, Interval{0x11a01, 0x11a0a},
Interval{0x11a33, 0x11a38}, Interval{0x11a3b, 0x11a3e},
Interval{0x11a47, 0x11a47}, Interval{0x11a51, 0x11a56},
Interval{0x11a59, 0x11a5b}, Interval{0x11a8a, 0x11a96},
Interval{0x11a98, 0x11a99}, Interval{0x11c30, 0x11c36},
Interval{0x11c38, 0x11c3d}, Interval{0x11c3f, 0x11c3f},
Interval{0x11c92, 0x11ca7}, Interval{0x11caa, 0x11cb0},
Interval{0x11cb2, 0x11cb3}, Interval{0x11cb5, 0x11cb6},
Interval{0x11d31, 0x11d36}, Interval{0x11d3a, 0x11d3a},
Interval{0x11d3c, 0x11d3d}, Interval{0x11d3f, 0x11d45},
Interval{0x11d47, 0x11d47}, Interval{0x11d90, 0x11d91},
Interval{0x11d95, 0x11d95}, Interval{0x11d97, 0x11d97},
Interval{0x11ef3, 0x11ef4}, Interval{0x16af0, 0x16af4},
Interval{0x16b30, 0x16b36}, Interval{0x16f4f, 0x16f4f},
Interval{0x16f8f, 0x16f92}, Interval{0x16fe4, 0x16fe4},
Interval{0x1bc9d, 0x1bc9e}, Interval{0x1d167, 0x1d169},
Interval{0x1d17b, 0x1d182}, Interval{0x1d185, 0x1d18b},
Interval{0x1d1aa, 0x1d1ad}, Interval{0x1d242, 0x1d244},
Interval{0x1da00, 0x1da36}, Interval{0x1da3b, 0x1da6c},
Interval{0x1da75, 0x1da75}, Interval{0x1da84, 0x1da84},
Interval{0x1da9b, 0x1da9f}, Interval{0x1daa1, 0x1daaf},
Interval{0x1e000, 0x1e006}, Interval{0x1e008, 0x1e018},
Interval{0x1e01b, 0x1e021}, Interval{0x1e023, 0x1e024},
Interval{0x1e026, 0x1e02a}, Interval{0x1e130, 0x1e136},
Interval{0x1e2ec, 0x1e2ef}, Interval{0x1e8d0, 0x1e8d6},
Interval{0x1e944, 0x1e94a}, Interval{0xe0100, 0xe01ef},
};
// As of Unicode 13.0.0
const std::array<Interval, 116> g_full_width_characters = {
Interval{0x01100, 0x0115f}, Interval{0x0231a, 0x0231b},
Interval{0x02329, 0x0232a}, Interval{0x023e9, 0x023ec},
Interval{0x023f0, 0x023f0}, Interval{0x023f3, 0x023f3},
Interval{0x025fd, 0x025fe}, Interval{0x02614, 0x02615},
Interval{0x02648, 0x02653}, Interval{0x0267f, 0x0267f},
Interval{0x02693, 0x02693}, Interval{0x026a1, 0x026a1},
Interval{0x026aa, 0x026ab}, Interval{0x026bd, 0x026be},
Interval{0x026c4, 0x026c5}, Interval{0x026ce, 0x026ce},
Interval{0x026d4, 0x026d4}, Interval{0x026ea, 0x026ea},
Interval{0x026f2, 0x026f3}, Interval{0x026f5, 0x026f5},
Interval{0x026fa, 0x026fa}, Interval{0x026fd, 0x026fd},
Interval{0x02705, 0x02705}, Interval{0x0270a, 0x0270b},
Interval{0x02728, 0x02728}, Interval{0x0274c, 0x0274c},
Interval{0x0274e, 0x0274e}, Interval{0x02753, 0x02755},
Interval{0x02757, 0x02757}, Interval{0x02795, 0x02797},
Interval{0x027b0, 0x027b0}, Interval{0x027bf, 0x027bf},
Interval{0x02b1b, 0x02b1c}, Interval{0x02b50, 0x02b50},
Interval{0x02b55, 0x02b55}, Interval{0x02e80, 0x02e99},
Interval{0x02e9b, 0x02ef3}, Interval{0x02f00, 0x02fd5},
Interval{0x02ff0, 0x02ffb}, Interval{0x03000, 0x0303e},
Interval{0x03041, 0x03096}, Interval{0x03099, 0x030ff},
Interval{0x03105, 0x0312f}, Interval{0x03131, 0x0318e},
Interval{0x03190, 0x031e3}, Interval{0x031f0, 0x0321e},
Interval{0x03220, 0x03247}, Interval{0x03250, 0x04dbf},
Interval{0x04e00, 0x0a48c}, Interval{0x0a490, 0x0a4c6},
Interval{0x0a960, 0x0a97c}, Interval{0x0ac00, 0x0d7a3},
Interval{0x0f900, 0x0faff}, Interval{0x0fe10, 0x0fe19},
Interval{0x0fe30, 0x0fe52}, Interval{0x0fe54, 0x0fe66},
Interval{0x0fe68, 0x0fe6b}, Interval{0x0ff01, 0x0ff60},
Interval{0x0ffe0, 0x0ffe6}, Interval{0x16fe0, 0x16fe4},
Interval{0x16ff0, 0x16ff1}, Interval{0x17000, 0x187f7},
Interval{0x18800, 0x18cd5}, Interval{0x18d00, 0x18d08},
Interval{0x1b000, 0x1b11e}, Interval{0x1b150, 0x1b152},
Interval{0x1b164, 0x1b167}, Interval{0x1b170, 0x1b2fb},
Interval{0x1f004, 0x1f004}, Interval{0x1f0cf, 0x1f0cf},
Interval{0x1f18e, 0x1f18e}, Interval{0x1f191, 0x1f19a},
Interval{0x1f200, 0x1f202}, Interval{0x1f210, 0x1f23b},
Interval{0x1f240, 0x1f248}, Interval{0x1f250, 0x1f251},
Interval{0x1f260, 0x1f265}, Interval{0x1f300, 0x1f320},
Interval{0x1f32d, 0x1f335}, Interval{0x1f337, 0x1f37c},
Interval{0x1f37e, 0x1f393}, Interval{0x1f3a0, 0x1f3ca},
Interval{0x1f3cf, 0x1f3d3}, Interval{0x1f3e0, 0x1f3f0},
Interval{0x1f3f4, 0x1f3f4}, Interval{0x1f3f8, 0x1f43e},
Interval{0x1f440, 0x1f440}, Interval{0x1f442, 0x1f4fc},
Interval{0x1f4ff, 0x1f53d}, Interval{0x1f54b, 0x1f54e},
Interval{0x1f550, 0x1f567}, Interval{0x1f57a, 0x1f57a},
Interval{0x1f595, 0x1f596}, Interval{0x1f5a4, 0x1f5a4},
Interval{0x1f5fb, 0x1f64f}, Interval{0x1f680, 0x1f6c5},
Interval{0x1f6cc, 0x1f6cc}, Interval{0x1f6d0, 0x1f6d2},
Interval{0x1f6d5, 0x1f6d7}, Interval{0x1f6eb, 0x1f6ec},
Interval{0x1f6f4, 0x1f6fc}, Interval{0x1f7e0, 0x1f7eb},
Interval{0x1f90c, 0x1f93a}, Interval{0x1f93c, 0x1f945},
Interval{0x1f947, 0x1f978}, Interval{0x1f97a, 0x1f9cb},
Interval{0x1f9cd, 0x1f9ff}, Interval{0x1fa70, 0x1fa74},
Interval{0x1fa78, 0x1fa7a}, Interval{0x1fa80, 0x1fa86},
Interval{0x1fa90, 0x1faa8}, Interval{0x1fab0, 0x1fab6},
Interval{0x1fac0, 0x1fac2}, Interval{0x1fad0, 0x1fad6},
Interval{0x20000, 0x2fffd}, Interval{0x30000, 0x3fffd},
};
// Find a codepoint inside a sorted list of Interval.
2022-03-31 08:17:43 +08:00
bool Bisearch(uint32_t ucs, const Interval* table, int max) {
if (ucs < table[0].first || ucs > table[max].last) { // NOLINT
return false;
}
int min = 0;
while (max >= min) {
int mid = (min + max) / 2;
2022-03-31 08:17:43 +08:00
if (ucs > table[mid].last) { // NOLINT
min = mid + 1;
2022-03-31 08:17:43 +08:00
} else if (ucs < table[mid].first) { // NOLINT
max = mid - 1;
2022-03-31 08:17:43 +08:00
} else {
return true;
}
}
2022-03-31 08:17:43 +08:00
return false;
}
bool IsCombining(uint32_t ucs) {
2022-03-31 08:17:43 +08:00
return Bisearch(ucs, g_combining_characters.data(),
g_combining_characters.size() - 1);
}
bool IsFullWidth(uint32_t ucs) {
2022-03-31 08:17:43 +08:00
if (ucs < 0x0300) // Quick path: // NOLINT
return false;
2022-03-31 08:17:43 +08:00
return Bisearch(ucs, g_full_width_characters.data(),
g_full_width_characters.size() - 1);
}
bool IsControl(uint32_t ucs) {
2022-03-31 08:17:43 +08:00
if (ucs == 0) {
return true;
2022-03-31 08:17:43 +08:00
}
if (ucs < 32) { // NOLINT
return true;
2022-03-31 08:17:43 +08:00
}
if (ucs >= 0x7f && ucs < 0xa0) { // NOLINT
return true;
2022-03-31 08:17:43 +08:00
}
return false;
}
int codepoint_width(uint32_t ucs) {
2022-03-31 08:17:43 +08:00
if (IsControl(ucs)) {
return -1;
2022-03-31 08:17:43 +08:00
}
2022-03-31 08:17:43 +08:00
if (IsCombining(ucs)) {
return 0;
2022-03-31 08:17:43 +08:00
}
2022-03-31 08:17:43 +08:00
if (IsFullWidth(ucs)) {
return 2;
2022-03-31 08:17:43 +08:00
}
return 1;
}
// From UTF8 encoded string |input|, eat in between 1 and 4 byte representing
// one codepoint. Put the codepoint into |ucs|. Start at |start| and update
// |end| to represent the beginning of the next byte to eat for consecutive
// executions.
bool EatCodePoint(const std::string& input,
size_t start,
size_t* end,
uint32_t* ucs) {
if (start >= input.size()) {
*end = start + 1;
return false;
}
uint8_t byte_1 = input[start];
// 1 byte string.
2022-03-31 08:17:43 +08:00
if ((byte_1 & 0b1000'0000) == 0b0000'0000) { // NOLINT
*ucs = byte_1 & 0b0111'1111; // NOLINT
*end = start + 1;
return true;
}
// 2 byte string.
2022-03-31 08:17:43 +08:00
if ((byte_1 & 0b1110'0000) == 0b1100'0000 && // NOLINT
start + 1 < input.size()) {
uint8_t byte_2 = input[start + 1];
*ucs = 0;
2022-03-31 08:17:43 +08:00
*ucs += byte_1 & 0b0001'1111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_2 & 0b0011'1111; // NOLINT
*end = start + 2;
return true;
}
// 3 byte string.
2022-03-31 08:17:43 +08:00
if ((byte_1 & 0b1111'0000) == 0b1110'0000 && // NOLINT
start + 2 < input.size()) {
uint8_t byte_2 = input[start + 1];
uint8_t byte_3 = input[start + 2];
*ucs = 0;
2022-03-31 08:17:43 +08:00
*ucs += byte_1 & 0b0000'1111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_2 & 0b0011'1111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_3 & 0b0011'1111; // NOLINT
*end = start + 3;
return true;
}
// 4 byte string.
2022-03-31 08:17:43 +08:00
if ((byte_1 & 0b1111'1000) == 0b1111'0000 && // NOLINT
start + 3 < input.size()) {
uint8_t byte_2 = input[start + 1];
uint8_t byte_3 = input[start + 2];
uint8_t byte_4 = input[start + 3];
*ucs = 0;
2022-03-31 08:17:43 +08:00
*ucs += byte_1 & 0b0000'0111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_2 & 0b0011'1111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_3 & 0b0011'1111; // NOLINT
*ucs <<= 6; // NOLINT
*ucs += byte_4 & 0b0011'1111; // NOLINT
*end = start + 4;
return true;
}
*end = start + 1;
return false;
}
} // namespace
2018-09-18 14:48:40 +08:00
2020-08-09 20:53:56 +08:00
namespace ftxui {
int wchar_width(wchar_t ucs) {
return codepoint_width(uint32_t(ucs));
}
int wstring_width(const std::wstring& text) {
int width = 0;
for (const wchar_t& it : text) {
int w = wchar_width(it);
2022-03-31 08:17:43 +08:00
if (w < 0) {
return -1;
2022-03-31 08:17:43 +08:00
}
width += w;
}
return width;
}
int string_width(const std::string& input) {
int width = 0;
size_t start = 0;
while (start < input.size()) {
uint32_t codepoint = 0;
2022-03-31 08:17:43 +08:00
if (!EatCodePoint(input, start, &start, &codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
2022-03-31 08:17:43 +08:00
if (IsControl(codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
2022-03-31 08:17:43 +08:00
if (IsCombining(codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
if (IsFullWidth(codepoint)) {
width += 2;
continue;
}
width += 1;
}
return width;
}
std::vector<std::string> Utf8ToGlyphs(const std::string& input) {
std::vector<std::string> out;
std::string current;
out.reserve(input.size());
size_t start = 0;
size_t end = 0;
while (start < input.size()) {
2022-03-31 08:17:43 +08:00
uint32_t codepoint = 0;
if (!EatCodePoint(input, start, &end, &codepoint)) {
start = end;
continue;
}
std::string append = input.substr(start, end - start);
start = end;
// Ignore control characters.
2022-03-31 08:17:43 +08:00
if (IsControl(codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
// Combining characters are put with the previous glyph they are modifying.
if (IsCombining(codepoint)) {
2022-03-31 08:17:43 +08:00
if (!out.empty()) {
out.back() += append;
2022-03-31 08:17:43 +08:00
}
continue;
}
// Fullwidth characters take two cells. The second is made of the empty
// string to reserve the space the first is taking.
if (IsFullWidth(codepoint)) {
out.push_back(append);
2022-03-31 08:17:43 +08:00
out.emplace_back("");
continue;
}
// Normal characters:
out.push_back(append);
}
return out;
}
2022-03-31 08:17:43 +08:00
int GlyphPosition(const std::string& input, size_t glyph_index, size_t start) {
if (glyph_index <= 0) {
return 0;
2022-03-31 08:17:43 +08:00
}
size_t end = 0;
while (start < input.size()) {
2022-03-31 08:17:43 +08:00
uint32_t codepoint = 0;
bool eaten = EatCodePoint(input, start, &end, &codepoint);
// Ignore invalid, control characters and combining characters.
if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
start = end;
continue;
}
// We eat the beginning of the next glyph. If we are eating the one
// requested, return its start position immediately.
2022-03-31 08:17:43 +08:00
if (glyph_index == 0) {
return static_cast<int>(start);
}
// Otherwise, skip this glyph and iterate:
2022-03-31 08:17:43 +08:00
glyph_index--;
start = end;
}
2022-03-31 08:17:43 +08:00
return static_cast<int>(input.size());
}
std::vector<int> CellToGlyphIndex(const std::string& input) {
int x = -1;
std::vector<int> out;
out.reserve(input.size());
size_t start = 0;
size_t end = 0;
while (start < input.size()) {
2022-03-31 08:17:43 +08:00
uint32_t codepoint = 0;
bool eaten = EatCodePoint(input, start, &end, &codepoint);
start = end;
// Ignore invalid / control characters.
2022-03-31 08:17:43 +08:00
if (!eaten || IsControl(codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
// Combining characters are put with the previous glyph they are modifying.
if (IsCombining(codepoint)) {
if (x == -1) {
++x;
out.push_back(x);
}
continue;
}
// Fullwidth characters take two cells. The second is made of the empty
// string to reserve the space the first is taking.
if (IsFullWidth(codepoint)) {
++x;
out.push_back(x);
out.push_back(x);
continue;
}
// Normal characters:
++x;
out.push_back(x);
}
return out;
}
int GlyphCount(const std::string& input) {
int size = 0;
size_t start = 0;
size_t end = 0;
while (start < input.size()) {
2022-03-31 08:17:43 +08:00
uint32_t codepoint = 0;
bool eaten = EatCodePoint(input, start, &end, &codepoint);
start = end;
// Ignore invalid characters:
2022-03-31 08:17:43 +08:00
if (!eaten || IsControl(codepoint)) {
continue;
2022-03-31 08:17:43 +08:00
}
// Ignore combining characters, except when they don't have a preceding to
// combine with.
if (IsCombining(codepoint)) {
2022-03-31 08:17:43 +08:00
if (size == 0) {
size++;
2022-03-31 08:17:43 +08:00
}
continue;
}
size++;
}
return size;
}
2020-03-23 14:12:06 +08:00
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4996) // codecvt_utf8_utf16 is deprecated
2020-03-23 14:12:06 +08:00
#endif
2020-08-16 08:24:50 +08:00
/// Convert a UTF8 std::string into a std::wstring.
2018-09-18 14:48:40 +08:00
std::string to_string(const std::wstring& s) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.to_bytes(s);
}
2020-08-16 08:24:50 +08:00
/// Convert a std::wstring into a UTF8 std::string.
2018-09-18 14:48:40 +08:00
std::wstring to_wstring(const std::string& s) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
return converter.from_bytes(s);
}
2020-03-23 14:12:06 +08:00
#ifdef _MSC_VER
#pragma warning(pop)
2020-08-09 20:53:56 +08:00
#endif
} // namespace ftxui
// Copyright 2020 Arthur Sonzogni. All rights reserved.
// Use of this source code is governed by the MIT license that can be found in
// the LICENSE file.