2021-08-09 05:25:20 +08:00
|
|
|
// Most of this code is borrowed from:
|
|
|
|
// Markus Kuhn -- 2007-05-26 (Unicode 5.0)
|
|
|
|
// Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
|
|
|
|
// Thanks you!
|
|
|
|
//
|
|
|
|
// Modified by Arthur Sonzogni for FTXUI.
|
|
|
|
|
2019-01-21 06:04:10 +08:00
|
|
|
#include "ftxui/screen/string.hpp"
|
2018-09-18 14:48:40 +08:00
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
#include <array> // for array
|
|
|
|
#include <codecvt> // for codecvt_utf8_utf16
|
|
|
|
#include <cstdint> // for uint32_t, uint8_t
|
|
|
|
#include <locale> // for wstring_convert
|
|
|
|
#include <string> // for string, basic_string, wstring
|
2021-08-09 05:25:20 +08:00
|
|
|
|
2021-08-09 06:27:37 +08:00
|
|
|
#include "ftxui/screen/deprecated.hpp" // for wchar_width, wstring_width
|
|
|
|
|
2021-08-09 05:25:20 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
struct Interval {
|
|
|
|
uint32_t first;
|
|
|
|
uint32_t last;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Sorted list of non-overlapping intervals of non-spacing characters
|
|
|
|
// generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c"
|
2022-09-29 16:50:27 +08:00
|
|
|
// As of Unicode 13.0.0
|
|
|
|
const std::array<Interval, 324> g_combining_characters = {
|
|
|
|
Interval{0x00300, 0x0036f}, Interval{0x00483, 0x00489},
|
|
|
|
Interval{0x00591, 0x005bd}, Interval{0x005bf, 0x005bf},
|
|
|
|
Interval{0x005c1, 0x005c2}, Interval{0x005c4, 0x005c5},
|
|
|
|
Interval{0x005c7, 0x005c7}, Interval{0x00610, 0x0061a},
|
|
|
|
Interval{0x0064b, 0x0065f}, Interval{0x00670, 0x00670},
|
|
|
|
Interval{0x006d6, 0x006dc}, Interval{0x006df, 0x006e4},
|
|
|
|
Interval{0x006e7, 0x006e8}, Interval{0x006ea, 0x006ed},
|
|
|
|
Interval{0x00711, 0x00711}, Interval{0x00730, 0x0074a},
|
|
|
|
Interval{0x007a6, 0x007b0}, Interval{0x007eb, 0x007f3},
|
|
|
|
Interval{0x007fd, 0x007fd}, Interval{0x00816, 0x00819},
|
|
|
|
Interval{0x0081b, 0x00823}, Interval{0x00825, 0x00827},
|
|
|
|
Interval{0x00829, 0x0082d}, Interval{0x00859, 0x0085b},
|
|
|
|
Interval{0x008d3, 0x008e1}, Interval{0x008e3, 0x00902},
|
|
|
|
Interval{0x0093a, 0x0093a}, Interval{0x0093c, 0x0093c},
|
|
|
|
Interval{0x00941, 0x00948}, Interval{0x0094d, 0x0094d},
|
|
|
|
Interval{0x00951, 0x00957}, Interval{0x00962, 0x00963},
|
|
|
|
Interval{0x00981, 0x00981}, Interval{0x009bc, 0x009bc},
|
|
|
|
Interval{0x009c1, 0x009c4}, Interval{0x009cd, 0x009cd},
|
|
|
|
Interval{0x009e2, 0x009e3}, Interval{0x009fe, 0x009fe},
|
|
|
|
Interval{0x00a01, 0x00a02}, Interval{0x00a3c, 0x00a3c},
|
|
|
|
Interval{0x00a41, 0x00a42}, Interval{0x00a47, 0x00a48},
|
|
|
|
Interval{0x00a4b, 0x00a4d}, Interval{0x00a51, 0x00a51},
|
|
|
|
Interval{0x00a70, 0x00a71}, Interval{0x00a75, 0x00a75},
|
|
|
|
Interval{0x00a81, 0x00a82}, Interval{0x00abc, 0x00abc},
|
|
|
|
Interval{0x00ac1, 0x00ac5}, Interval{0x00ac7, 0x00ac8},
|
|
|
|
Interval{0x00acd, 0x00acd}, Interval{0x00ae2, 0x00ae3},
|
|
|
|
Interval{0x00afa, 0x00aff}, Interval{0x00b01, 0x00b01},
|
|
|
|
Interval{0x00b3c, 0x00b3c}, Interval{0x00b3f, 0x00b3f},
|
|
|
|
Interval{0x00b41, 0x00b44}, Interval{0x00b4d, 0x00b4d},
|
|
|
|
Interval{0x00b55, 0x00b56}, Interval{0x00b62, 0x00b63},
|
|
|
|
Interval{0x00b82, 0x00b82}, Interval{0x00bc0, 0x00bc0},
|
|
|
|
Interval{0x00bcd, 0x00bcd}, Interval{0x00c00, 0x00c00},
|
|
|
|
Interval{0x00c04, 0x00c04}, Interval{0x00c3e, 0x00c40},
|
|
|
|
Interval{0x00c46, 0x00c48}, Interval{0x00c4a, 0x00c4d},
|
|
|
|
Interval{0x00c55, 0x00c56}, Interval{0x00c62, 0x00c63},
|
|
|
|
Interval{0x00c81, 0x00c81}, Interval{0x00cbc, 0x00cbc},
|
|
|
|
Interval{0x00cbf, 0x00cbf}, Interval{0x00cc6, 0x00cc6},
|
|
|
|
Interval{0x00ccc, 0x00ccd}, Interval{0x00ce2, 0x00ce3},
|
|
|
|
Interval{0x00d00, 0x00d01}, Interval{0x00d3b, 0x00d3c},
|
|
|
|
Interval{0x00d41, 0x00d44}, Interval{0x00d4d, 0x00d4d},
|
|
|
|
Interval{0x00d62, 0x00d63}, Interval{0x00d81, 0x00d81},
|
|
|
|
Interval{0x00dca, 0x00dca}, Interval{0x00dd2, 0x00dd4},
|
|
|
|
Interval{0x00dd6, 0x00dd6}, Interval{0x00e31, 0x00e31},
|
|
|
|
Interval{0x00e34, 0x00e3a}, Interval{0x00e47, 0x00e4e},
|
|
|
|
Interval{0x00eb1, 0x00eb1}, Interval{0x00eb4, 0x00ebc},
|
|
|
|
Interval{0x00ec8, 0x00ecd}, Interval{0x00f18, 0x00f19},
|
|
|
|
Interval{0x00f35, 0x00f35}, Interval{0x00f37, 0x00f37},
|
|
|
|
Interval{0x00f39, 0x00f39}, Interval{0x00f71, 0x00f7e},
|
|
|
|
Interval{0x00f80, 0x00f84}, Interval{0x00f86, 0x00f87},
|
|
|
|
Interval{0x00f8d, 0x00f97}, Interval{0x00f99, 0x00fbc},
|
|
|
|
Interval{0x00fc6, 0x00fc6}, Interval{0x0102d, 0x01030},
|
|
|
|
Interval{0x01032, 0x01037}, Interval{0x01039, 0x0103a},
|
|
|
|
Interval{0x0103d, 0x0103e}, Interval{0x01058, 0x01059},
|
|
|
|
Interval{0x0105e, 0x01060}, Interval{0x01071, 0x01074},
|
|
|
|
Interval{0x01082, 0x01082}, Interval{0x01085, 0x01086},
|
|
|
|
Interval{0x0108d, 0x0108d}, Interval{0x0109d, 0x0109d},
|
|
|
|
Interval{0x0135d, 0x0135f}, Interval{0x01712, 0x01714},
|
|
|
|
Interval{0x01732, 0x01734}, Interval{0x01752, 0x01753},
|
|
|
|
Interval{0x01772, 0x01773}, Interval{0x017b4, 0x017b5},
|
|
|
|
Interval{0x017b7, 0x017bd}, Interval{0x017c6, 0x017c6},
|
|
|
|
Interval{0x017c9, 0x017d3}, Interval{0x017dd, 0x017dd},
|
|
|
|
Interval{0x0180b, 0x0180d}, Interval{0x01885, 0x01886},
|
|
|
|
Interval{0x018a9, 0x018a9}, Interval{0x01920, 0x01922},
|
|
|
|
Interval{0x01927, 0x01928}, Interval{0x01932, 0x01932},
|
|
|
|
Interval{0x01939, 0x0193b}, Interval{0x01a17, 0x01a18},
|
|
|
|
Interval{0x01a1b, 0x01a1b}, Interval{0x01a56, 0x01a56},
|
|
|
|
Interval{0x01a58, 0x01a5e}, Interval{0x01a60, 0x01a60},
|
|
|
|
Interval{0x01a62, 0x01a62}, Interval{0x01a65, 0x01a6c},
|
|
|
|
Interval{0x01a73, 0x01a7c}, Interval{0x01a7f, 0x01a7f},
|
|
|
|
Interval{0x01ab0, 0x01ac0}, Interval{0x01b00, 0x01b03},
|
|
|
|
Interval{0x01b34, 0x01b34}, Interval{0x01b36, 0x01b3a},
|
|
|
|
Interval{0x01b3c, 0x01b3c}, Interval{0x01b42, 0x01b42},
|
|
|
|
Interval{0x01b6b, 0x01b73}, Interval{0x01b80, 0x01b81},
|
|
|
|
Interval{0x01ba2, 0x01ba5}, Interval{0x01ba8, 0x01ba9},
|
|
|
|
Interval{0x01bab, 0x01bad}, Interval{0x01be6, 0x01be6},
|
|
|
|
Interval{0x01be8, 0x01be9}, Interval{0x01bed, 0x01bed},
|
|
|
|
Interval{0x01bef, 0x01bf1}, Interval{0x01c2c, 0x01c33},
|
|
|
|
Interval{0x01c36, 0x01c37}, Interval{0x01cd0, 0x01cd2},
|
|
|
|
Interval{0x01cd4, 0x01ce0}, Interval{0x01ce2, 0x01ce8},
|
|
|
|
Interval{0x01ced, 0x01ced}, Interval{0x01cf4, 0x01cf4},
|
|
|
|
Interval{0x01cf8, 0x01cf9}, Interval{0x01dc0, 0x01df9},
|
|
|
|
Interval{0x01dfb, 0x01dff}, Interval{0x020d0, 0x020f0},
|
|
|
|
Interval{0x02cef, 0x02cf1}, Interval{0x02d7f, 0x02d7f},
|
|
|
|
Interval{0x02de0, 0x02dff}, Interval{0x0302a, 0x0302d},
|
|
|
|
Interval{0x03099, 0x0309a}, Interval{0x0a66f, 0x0a672},
|
|
|
|
Interval{0x0a674, 0x0a67d}, Interval{0x0a69e, 0x0a69f},
|
|
|
|
Interval{0x0a6f0, 0x0a6f1}, Interval{0x0a802, 0x0a802},
|
|
|
|
Interval{0x0a806, 0x0a806}, Interval{0x0a80b, 0x0a80b},
|
|
|
|
Interval{0x0a825, 0x0a826}, Interval{0x0a82c, 0x0a82c},
|
|
|
|
Interval{0x0a8c4, 0x0a8c5}, Interval{0x0a8e0, 0x0a8f1},
|
|
|
|
Interval{0x0a8ff, 0x0a8ff}, Interval{0x0a926, 0x0a92d},
|
|
|
|
Interval{0x0a947, 0x0a951}, Interval{0x0a980, 0x0a982},
|
|
|
|
Interval{0x0a9b3, 0x0a9b3}, Interval{0x0a9b6, 0x0a9b9},
|
|
|
|
Interval{0x0a9bc, 0x0a9bd}, Interval{0x0a9e5, 0x0a9e5},
|
|
|
|
Interval{0x0aa29, 0x0aa2e}, Interval{0x0aa31, 0x0aa32},
|
|
|
|
Interval{0x0aa35, 0x0aa36}, Interval{0x0aa43, 0x0aa43},
|
|
|
|
Interval{0x0aa4c, 0x0aa4c}, Interval{0x0aa7c, 0x0aa7c},
|
|
|
|
Interval{0x0aab0, 0x0aab0}, Interval{0x0aab2, 0x0aab4},
|
|
|
|
Interval{0x0aab7, 0x0aab8}, Interval{0x0aabe, 0x0aabf},
|
|
|
|
Interval{0x0aac1, 0x0aac1}, Interval{0x0aaec, 0x0aaed},
|
|
|
|
Interval{0x0aaf6, 0x0aaf6}, Interval{0x0abe5, 0x0abe5},
|
|
|
|
Interval{0x0abe8, 0x0abe8}, Interval{0x0abed, 0x0abed},
|
|
|
|
Interval{0x0fb1e, 0x0fb1e}, Interval{0x0fe00, 0x0fe0f},
|
|
|
|
Interval{0x0fe20, 0x0fe2f}, Interval{0x101fd, 0x101fd},
|
|
|
|
Interval{0x102e0, 0x102e0}, Interval{0x10376, 0x1037a},
|
|
|
|
Interval{0x10a01, 0x10a03}, Interval{0x10a05, 0x10a06},
|
|
|
|
Interval{0x10a0c, 0x10a0f}, Interval{0x10a38, 0x10a3a},
|
|
|
|
Interval{0x10a3f, 0x10a3f}, Interval{0x10ae5, 0x10ae6},
|
|
|
|
Interval{0x10d24, 0x10d27}, Interval{0x10eab, 0x10eac},
|
|
|
|
Interval{0x10f46, 0x10f50}, Interval{0x11001, 0x11001},
|
|
|
|
Interval{0x11038, 0x11046}, Interval{0x1107f, 0x11081},
|
|
|
|
Interval{0x110b3, 0x110b6}, Interval{0x110b9, 0x110ba},
|
|
|
|
Interval{0x11100, 0x11102}, Interval{0x11127, 0x1112b},
|
|
|
|
Interval{0x1112d, 0x11134}, Interval{0x11173, 0x11173},
|
|
|
|
Interval{0x11180, 0x11181}, Interval{0x111b6, 0x111be},
|
|
|
|
Interval{0x111c9, 0x111cc}, Interval{0x111cf, 0x111cf},
|
|
|
|
Interval{0x1122f, 0x11231}, Interval{0x11234, 0x11234},
|
|
|
|
Interval{0x11236, 0x11237}, Interval{0x1123e, 0x1123e},
|
|
|
|
Interval{0x112df, 0x112df}, Interval{0x112e3, 0x112ea},
|
|
|
|
Interval{0x11300, 0x11301}, Interval{0x1133b, 0x1133c},
|
|
|
|
Interval{0x11340, 0x11340}, Interval{0x11366, 0x1136c},
|
|
|
|
Interval{0x11370, 0x11374}, Interval{0x11438, 0x1143f},
|
|
|
|
Interval{0x11442, 0x11444}, Interval{0x11446, 0x11446},
|
|
|
|
Interval{0x1145e, 0x1145e}, Interval{0x114b3, 0x114b8},
|
|
|
|
Interval{0x114ba, 0x114ba}, Interval{0x114bf, 0x114c0},
|
|
|
|
Interval{0x114c2, 0x114c3}, Interval{0x115b2, 0x115b5},
|
|
|
|
Interval{0x115bc, 0x115bd}, Interval{0x115bf, 0x115c0},
|
|
|
|
Interval{0x115dc, 0x115dd}, Interval{0x11633, 0x1163a},
|
|
|
|
Interval{0x1163d, 0x1163d}, Interval{0x1163f, 0x11640},
|
|
|
|
Interval{0x116ab, 0x116ab}, Interval{0x116ad, 0x116ad},
|
|
|
|
Interval{0x116b0, 0x116b5}, Interval{0x116b7, 0x116b7},
|
|
|
|
Interval{0x1171d, 0x1171f}, Interval{0x11722, 0x11725},
|
|
|
|
Interval{0x11727, 0x1172b}, Interval{0x1182f, 0x11837},
|
|
|
|
Interval{0x11839, 0x1183a}, Interval{0x1193b, 0x1193c},
|
|
|
|
Interval{0x1193e, 0x1193e}, Interval{0x11943, 0x11943},
|
|
|
|
Interval{0x119d4, 0x119d7}, Interval{0x119da, 0x119db},
|
|
|
|
Interval{0x119e0, 0x119e0}, Interval{0x11a01, 0x11a0a},
|
|
|
|
Interval{0x11a33, 0x11a38}, Interval{0x11a3b, 0x11a3e},
|
|
|
|
Interval{0x11a47, 0x11a47}, Interval{0x11a51, 0x11a56},
|
|
|
|
Interval{0x11a59, 0x11a5b}, Interval{0x11a8a, 0x11a96},
|
|
|
|
Interval{0x11a98, 0x11a99}, Interval{0x11c30, 0x11c36},
|
|
|
|
Interval{0x11c38, 0x11c3d}, Interval{0x11c3f, 0x11c3f},
|
|
|
|
Interval{0x11c92, 0x11ca7}, Interval{0x11caa, 0x11cb0},
|
|
|
|
Interval{0x11cb2, 0x11cb3}, Interval{0x11cb5, 0x11cb6},
|
|
|
|
Interval{0x11d31, 0x11d36}, Interval{0x11d3a, 0x11d3a},
|
|
|
|
Interval{0x11d3c, 0x11d3d}, Interval{0x11d3f, 0x11d45},
|
|
|
|
Interval{0x11d47, 0x11d47}, Interval{0x11d90, 0x11d91},
|
|
|
|
Interval{0x11d95, 0x11d95}, Interval{0x11d97, 0x11d97},
|
|
|
|
Interval{0x11ef3, 0x11ef4}, Interval{0x16af0, 0x16af4},
|
|
|
|
Interval{0x16b30, 0x16b36}, Interval{0x16f4f, 0x16f4f},
|
|
|
|
Interval{0x16f8f, 0x16f92}, Interval{0x16fe4, 0x16fe4},
|
|
|
|
Interval{0x1bc9d, 0x1bc9e}, Interval{0x1d167, 0x1d169},
|
|
|
|
Interval{0x1d17b, 0x1d182}, Interval{0x1d185, 0x1d18b},
|
|
|
|
Interval{0x1d1aa, 0x1d1ad}, Interval{0x1d242, 0x1d244},
|
|
|
|
Interval{0x1da00, 0x1da36}, Interval{0x1da3b, 0x1da6c},
|
|
|
|
Interval{0x1da75, 0x1da75}, Interval{0x1da84, 0x1da84},
|
|
|
|
Interval{0x1da9b, 0x1da9f}, Interval{0x1daa1, 0x1daaf},
|
|
|
|
Interval{0x1e000, 0x1e006}, Interval{0x1e008, 0x1e018},
|
|
|
|
Interval{0x1e01b, 0x1e021}, Interval{0x1e023, 0x1e024},
|
|
|
|
Interval{0x1e026, 0x1e02a}, Interval{0x1e130, 0x1e136},
|
|
|
|
Interval{0x1e2ec, 0x1e2ef}, Interval{0x1e8d0, 0x1e8d6},
|
|
|
|
Interval{0x1e944, 0x1e94a}, Interval{0xe0100, 0xe01ef},
|
2021-08-09 05:25:20 +08:00
|
|
|
};
|
|
|
|
|
2022-09-29 16:50:27 +08:00
|
|
|
// As of Unicode 13.0.0
|
|
|
|
const std::array<Interval, 116> g_full_width_characters = {
|
|
|
|
Interval{0x01100, 0x0115f}, Interval{0x0231a, 0x0231b},
|
|
|
|
Interval{0x02329, 0x0232a}, Interval{0x023e9, 0x023ec},
|
|
|
|
Interval{0x023f0, 0x023f0}, Interval{0x023f3, 0x023f3},
|
|
|
|
Interval{0x025fd, 0x025fe}, Interval{0x02614, 0x02615},
|
|
|
|
Interval{0x02648, 0x02653}, Interval{0x0267f, 0x0267f},
|
|
|
|
Interval{0x02693, 0x02693}, Interval{0x026a1, 0x026a1},
|
|
|
|
Interval{0x026aa, 0x026ab}, Interval{0x026bd, 0x026be},
|
|
|
|
Interval{0x026c4, 0x026c5}, Interval{0x026ce, 0x026ce},
|
|
|
|
Interval{0x026d4, 0x026d4}, Interval{0x026ea, 0x026ea},
|
|
|
|
Interval{0x026f2, 0x026f3}, Interval{0x026f5, 0x026f5},
|
|
|
|
Interval{0x026fa, 0x026fa}, Interval{0x026fd, 0x026fd},
|
|
|
|
Interval{0x02705, 0x02705}, Interval{0x0270a, 0x0270b},
|
|
|
|
Interval{0x02728, 0x02728}, Interval{0x0274c, 0x0274c},
|
|
|
|
Interval{0x0274e, 0x0274e}, Interval{0x02753, 0x02755},
|
|
|
|
Interval{0x02757, 0x02757}, Interval{0x02795, 0x02797},
|
|
|
|
Interval{0x027b0, 0x027b0}, Interval{0x027bf, 0x027bf},
|
|
|
|
Interval{0x02b1b, 0x02b1c}, Interval{0x02b50, 0x02b50},
|
|
|
|
Interval{0x02b55, 0x02b55}, Interval{0x02e80, 0x02e99},
|
|
|
|
Interval{0x02e9b, 0x02ef3}, Interval{0x02f00, 0x02fd5},
|
|
|
|
Interval{0x02ff0, 0x02ffb}, Interval{0x03000, 0x0303e},
|
|
|
|
Interval{0x03041, 0x03096}, Interval{0x03099, 0x030ff},
|
|
|
|
Interval{0x03105, 0x0312f}, Interval{0x03131, 0x0318e},
|
|
|
|
Interval{0x03190, 0x031e3}, Interval{0x031f0, 0x0321e},
|
|
|
|
Interval{0x03220, 0x03247}, Interval{0x03250, 0x04dbf},
|
|
|
|
Interval{0x04e00, 0x0a48c}, Interval{0x0a490, 0x0a4c6},
|
|
|
|
Interval{0x0a960, 0x0a97c}, Interval{0x0ac00, 0x0d7a3},
|
|
|
|
Interval{0x0f900, 0x0faff}, Interval{0x0fe10, 0x0fe19},
|
|
|
|
Interval{0x0fe30, 0x0fe52}, Interval{0x0fe54, 0x0fe66},
|
|
|
|
Interval{0x0fe68, 0x0fe6b}, Interval{0x0ff01, 0x0ff60},
|
|
|
|
Interval{0x0ffe0, 0x0ffe6}, Interval{0x16fe0, 0x16fe4},
|
|
|
|
Interval{0x16ff0, 0x16ff1}, Interval{0x17000, 0x187f7},
|
|
|
|
Interval{0x18800, 0x18cd5}, Interval{0x18d00, 0x18d08},
|
|
|
|
Interval{0x1b000, 0x1b11e}, Interval{0x1b150, 0x1b152},
|
|
|
|
Interval{0x1b164, 0x1b167}, Interval{0x1b170, 0x1b2fb},
|
|
|
|
Interval{0x1f004, 0x1f004}, Interval{0x1f0cf, 0x1f0cf},
|
|
|
|
Interval{0x1f18e, 0x1f18e}, Interval{0x1f191, 0x1f19a},
|
|
|
|
Interval{0x1f200, 0x1f202}, Interval{0x1f210, 0x1f23b},
|
|
|
|
Interval{0x1f240, 0x1f248}, Interval{0x1f250, 0x1f251},
|
|
|
|
Interval{0x1f260, 0x1f265}, Interval{0x1f300, 0x1f320},
|
|
|
|
Interval{0x1f32d, 0x1f335}, Interval{0x1f337, 0x1f37c},
|
|
|
|
Interval{0x1f37e, 0x1f393}, Interval{0x1f3a0, 0x1f3ca},
|
|
|
|
Interval{0x1f3cf, 0x1f3d3}, Interval{0x1f3e0, 0x1f3f0},
|
|
|
|
Interval{0x1f3f4, 0x1f3f4}, Interval{0x1f3f8, 0x1f43e},
|
|
|
|
Interval{0x1f440, 0x1f440}, Interval{0x1f442, 0x1f4fc},
|
|
|
|
Interval{0x1f4ff, 0x1f53d}, Interval{0x1f54b, 0x1f54e},
|
|
|
|
Interval{0x1f550, 0x1f567}, Interval{0x1f57a, 0x1f57a},
|
|
|
|
Interval{0x1f595, 0x1f596}, Interval{0x1f5a4, 0x1f5a4},
|
|
|
|
Interval{0x1f5fb, 0x1f64f}, Interval{0x1f680, 0x1f6c5},
|
|
|
|
Interval{0x1f6cc, 0x1f6cc}, Interval{0x1f6d0, 0x1f6d2},
|
|
|
|
Interval{0x1f6d5, 0x1f6d7}, Interval{0x1f6eb, 0x1f6ec},
|
|
|
|
Interval{0x1f6f4, 0x1f6fc}, Interval{0x1f7e0, 0x1f7eb},
|
|
|
|
Interval{0x1f90c, 0x1f93a}, Interval{0x1f93c, 0x1f945},
|
|
|
|
Interval{0x1f947, 0x1f978}, Interval{0x1f97a, 0x1f9cb},
|
|
|
|
Interval{0x1f9cd, 0x1f9ff}, Interval{0x1fa70, 0x1fa74},
|
|
|
|
Interval{0x1fa78, 0x1fa7a}, Interval{0x1fa80, 0x1fa86},
|
|
|
|
Interval{0x1fa90, 0x1faa8}, Interval{0x1fab0, 0x1fab6},
|
|
|
|
Interval{0x1fac0, 0x1fac2}, Interval{0x1fad0, 0x1fad6},
|
|
|
|
Interval{0x20000, 0x2fffd}, Interval{0x30000, 0x3fffd},
|
2021-08-09 05:25:20 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Find a codepoint inside a sorted list of Interval.
|
2022-03-31 08:17:43 +08:00
|
|
|
bool Bisearch(uint32_t ucs, const Interval* table, int max) {
|
|
|
|
if (ucs < table[0].first || ucs > table[max].last) { // NOLINT
|
|
|
|
return false;
|
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
|
|
|
int min = 0;
|
|
|
|
while (max >= min) {
|
|
|
|
int mid = (min + max) / 2;
|
2022-03-31 08:17:43 +08:00
|
|
|
if (ucs > table[mid].last) { // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
min = mid + 1;
|
2022-03-31 08:17:43 +08:00
|
|
|
} else if (ucs < table[mid].first) { // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
max = mid - 1;
|
2022-03-31 08:17:43 +08:00
|
|
|
} else {
|
|
|
|
return true;
|
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
}
|
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
return false;
|
2021-08-09 05:25:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool IsCombining(uint32_t ucs) {
|
2022-03-31 08:17:43 +08:00
|
|
|
return Bisearch(ucs, g_combining_characters.data(),
|
|
|
|
g_combining_characters.size() - 1);
|
2021-08-09 05:25:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool IsFullWidth(uint32_t ucs) {
|
2022-03-31 08:17:43 +08:00
|
|
|
if (ucs < 0x0300) // Quick path: // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
return false;
|
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
return Bisearch(ucs, g_full_width_characters.data(),
|
|
|
|
g_full_width_characters.size() - 1);
|
2021-08-09 05:25:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool IsControl(uint32_t ucs) {
|
2022-03-31 08:17:43 +08:00
|
|
|
if (ucs == 0) {
|
2021-08-09 05:25:20 +08:00
|
|
|
return true;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
|
|
|
if (ucs < 32) { // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
return true;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
|
|
|
if (ucs >= 0x7f && ucs < 0xa0) { // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
return true;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int codepoint_width(uint32_t ucs) {
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsControl(ucs)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
return -1;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsCombining(ucs)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
return 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsFullWidth(ucs)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
return 2;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// From UTF8 encoded string |input|, eat in between 1 and 4 byte representing
|
|
|
|
// one codepoint. Put the codepoint into |ucs|. Start at |start| and update
|
|
|
|
// |end| to represent the beginning of the next byte to eat for consecutive
|
|
|
|
// executions.
|
|
|
|
bool EatCodePoint(const std::string& input,
|
|
|
|
size_t start,
|
|
|
|
size_t* end,
|
|
|
|
uint32_t* ucs) {
|
|
|
|
if (start >= input.size()) {
|
|
|
|
*end = start + 1;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
uint8_t byte_1 = input[start];
|
|
|
|
|
|
|
|
// 1 byte string.
|
2022-03-31 08:17:43 +08:00
|
|
|
if ((byte_1 & 0b1000'0000) == 0b0000'0000) { // NOLINT
|
|
|
|
*ucs = byte_1 & 0b0111'1111; // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
*end = start + 1;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2 byte string.
|
2022-03-31 08:17:43 +08:00
|
|
|
if ((byte_1 & 0b1110'0000) == 0b1100'0000 && // NOLINT
|
|
|
|
start + 1 < input.size()) {
|
2021-08-09 05:25:20 +08:00
|
|
|
uint8_t byte_2 = input[start + 1];
|
|
|
|
*ucs = 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
*ucs += byte_1 & 0b0001'1111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_2 & 0b0011'1111; // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
*end = start + 2;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 3 byte string.
|
2022-03-31 08:17:43 +08:00
|
|
|
if ((byte_1 & 0b1111'0000) == 0b1110'0000 && // NOLINT
|
|
|
|
start + 2 < input.size()) {
|
2021-08-09 05:25:20 +08:00
|
|
|
uint8_t byte_2 = input[start + 1];
|
|
|
|
uint8_t byte_3 = input[start + 2];
|
|
|
|
*ucs = 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
*ucs += byte_1 & 0b0000'1111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_2 & 0b0011'1111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_3 & 0b0011'1111; // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
*end = start + 3;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 4 byte string.
|
2022-03-31 08:17:43 +08:00
|
|
|
if ((byte_1 & 0b1111'1000) == 0b1111'0000 && // NOLINT
|
|
|
|
start + 3 < input.size()) {
|
2021-08-09 05:25:20 +08:00
|
|
|
uint8_t byte_2 = input[start + 1];
|
|
|
|
uint8_t byte_3 = input[start + 2];
|
|
|
|
uint8_t byte_4 = input[start + 3];
|
|
|
|
*ucs = 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
*ucs += byte_1 & 0b0000'0111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_2 & 0b0011'1111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_3 & 0b0011'1111; // NOLINT
|
|
|
|
*ucs <<= 6; // NOLINT
|
|
|
|
*ucs += byte_4 & 0b0011'1111; // NOLINT
|
2021-08-09 05:25:20 +08:00
|
|
|
*end = start + 4;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
*end = start + 1;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
2018-09-18 14:48:40 +08:00
|
|
|
|
2020-08-09 20:53:56 +08:00
|
|
|
namespace ftxui {
|
2021-08-09 05:25:20 +08:00
|
|
|
int wchar_width(wchar_t ucs) {
|
|
|
|
return codepoint_width(uint32_t(ucs));
|
|
|
|
}
|
|
|
|
|
|
|
|
int wstring_width(const std::wstring& text) {
|
|
|
|
int width = 0;
|
|
|
|
|
|
|
|
for (const wchar_t& it : text) {
|
|
|
|
int w = wchar_width(it);
|
2022-03-31 08:17:43 +08:00
|
|
|
if (w < 0) {
|
2021-08-09 05:25:20 +08:00
|
|
|
return -1;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
width += w;
|
|
|
|
}
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
|
|
|
|
int string_width(const std::string& input) {
|
|
|
|
int width = 0;
|
|
|
|
size_t start = 0;
|
|
|
|
while (start < input.size()) {
|
|
|
|
uint32_t codepoint = 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
if (!EatCodePoint(input, start, &start, &codepoint)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsControl(codepoint)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsCombining(codepoint)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
|
|
|
if (IsFullWidth(codepoint)) {
|
|
|
|
width += 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
width += 1;
|
|
|
|
}
|
|
|
|
return width;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<std::string> Utf8ToGlyphs(const std::string& input) {
|
|
|
|
std::vector<std::string> out;
|
|
|
|
std::string current;
|
|
|
|
out.reserve(input.size());
|
|
|
|
size_t start = 0;
|
|
|
|
size_t end = 0;
|
|
|
|
while (start < input.size()) {
|
2022-03-31 08:17:43 +08:00
|
|
|
uint32_t codepoint = 0;
|
2021-08-09 05:25:20 +08:00
|
|
|
if (!EatCodePoint(input, start, &end, &codepoint)) {
|
|
|
|
start = end;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string append = input.substr(start, end - start);
|
|
|
|
start = end;
|
|
|
|
|
|
|
|
// Ignore control characters.
|
2022-03-31 08:17:43 +08:00
|
|
|
if (IsControl(codepoint)) {
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
|
|
|
|
// Combining characters are put with the previous glyph they are modifying.
|
|
|
|
if (IsCombining(codepoint)) {
|
2022-03-31 08:17:43 +08:00
|
|
|
if (!out.empty()) {
|
2021-08-09 05:25:20 +08:00
|
|
|
out.back() += append;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fullwidth characters take two cells. The second is made of the empty
|
|
|
|
// string to reserve the space the first is taking.
|
|
|
|
if (IsFullWidth(codepoint)) {
|
|
|
|
out.push_back(append);
|
2022-03-31 08:17:43 +08:00
|
|
|
out.emplace_back("");
|
2021-08-09 05:25:20 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Normal characters:
|
|
|
|
out.push_back(append);
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2022-03-31 08:17:43 +08:00
|
|
|
int GlyphPosition(const std::string& input, size_t glyph_index, size_t start) {
|
|
|
|
if (glyph_index <= 0) {
|
2021-12-13 04:31:54 +08:00
|
|
|
return 0;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-12-13 04:31:54 +08:00
|
|
|
size_t end = 0;
|
|
|
|
while (start < input.size()) {
|
2022-03-31 08:17:43 +08:00
|
|
|
uint32_t codepoint = 0;
|
2021-12-13 04:31:54 +08:00
|
|
|
bool eaten = EatCodePoint(input, start, &end, &codepoint);
|
|
|
|
|
|
|
|
// Ignore invalid, control characters and combining characters.
|
|
|
|
if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
|
|
|
|
start = end;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We eat the beginning of the next glyph. If we are eating the one
|
|
|
|
// requested, return its start position immediately.
|
2022-03-31 08:17:43 +08:00
|
|
|
if (glyph_index == 0) {
|
|
|
|
return static_cast<int>(start);
|
|
|
|
}
|
2021-12-13 04:31:54 +08:00
|
|
|
|
|
|
|
// Otherwise, skip this glyph and iterate:
|
2022-03-31 08:17:43 +08:00
|
|
|
glyph_index--;
|
2021-12-13 04:31:54 +08:00
|
|
|
start = end;
|
|
|
|
}
|
2022-03-31 08:17:43 +08:00
|
|
|
return static_cast<int>(input.size());
|
2021-12-13 04:31:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<int> CellToGlyphIndex(const std::string& input) {
|
|
|
|
int x = -1;
|
|
|
|
std::vector<int> out;
|
|
|
|
out.reserve(input.size());
|
|
|
|
size_t start = 0;
|
|
|
|
size_t end = 0;
|
|
|
|
while (start < input.size()) {
|
2022-03-31 08:17:43 +08:00
|
|
|
uint32_t codepoint = 0;
|
2021-12-13 04:31:54 +08:00
|
|
|
bool eaten = EatCodePoint(input, start, &end, &codepoint);
|
|
|
|
start = end;
|
|
|
|
|
|
|
|
// Ignore invalid / control characters.
|
2022-03-31 08:17:43 +08:00
|
|
|
if (!eaten || IsControl(codepoint)) {
|
2021-12-13 04:31:54 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-12-13 04:31:54 +08:00
|
|
|
|
|
|
|
// Combining characters are put with the previous glyph they are modifying.
|
|
|
|
if (IsCombining(codepoint)) {
|
|
|
|
if (x == -1) {
|
|
|
|
++x;
|
|
|
|
out.push_back(x);
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fullwidth characters take two cells. The second is made of the empty
|
|
|
|
// string to reserve the space the first is taking.
|
|
|
|
if (IsFullWidth(codepoint)) {
|
|
|
|
++x;
|
|
|
|
out.push_back(x);
|
|
|
|
out.push_back(x);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Normal characters:
|
|
|
|
++x;
|
|
|
|
out.push_back(x);
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
int GlyphCount(const std::string& input) {
|
|
|
|
int size = 0;
|
|
|
|
size_t start = 0;
|
|
|
|
size_t end = 0;
|
|
|
|
while (start < input.size()) {
|
2022-03-31 08:17:43 +08:00
|
|
|
uint32_t codepoint = 0;
|
2021-12-13 04:31:54 +08:00
|
|
|
bool eaten = EatCodePoint(input, start, &end, &codepoint);
|
|
|
|
start = end;
|
|
|
|
|
|
|
|
// Ignore invalid characters:
|
2022-03-31 08:17:43 +08:00
|
|
|
if (!eaten || IsControl(codepoint)) {
|
2021-12-13 04:31:54 +08:00
|
|
|
continue;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-12-13 04:31:54 +08:00
|
|
|
|
|
|
|
// Ignore combining characters, except when they don't have a preceding to
|
|
|
|
// combine with.
|
|
|
|
if (IsCombining(codepoint)) {
|
2022-03-31 08:17:43 +08:00
|
|
|
if (size == 0) {
|
2021-12-13 04:31:54 +08:00
|
|
|
size++;
|
2022-03-31 08:17:43 +08:00
|
|
|
}
|
2021-12-13 04:31:54 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
size++;
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2020-03-23 14:12:06 +08:00
|
|
|
#ifdef _MSC_VER
|
2020-08-16 06:24:18 +08:00
|
|
|
#pragma warning(push)
|
|
|
|
#pragma warning(disable : 4996) // codecvt_utf8_utf16 is deprecated
|
2020-03-23 14:12:06 +08:00
|
|
|
#endif
|
|
|
|
|
2020-08-16 08:24:50 +08:00
|
|
|
/// Convert a UTF8 std::string into a std::wstring.
|
2018-09-18 14:48:40 +08:00
|
|
|
std::string to_string(const std::wstring& s) {
|
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
|
|
return converter.to_bytes(s);
|
|
|
|
}
|
|
|
|
|
2020-08-16 08:24:50 +08:00
|
|
|
/// Convert a std::wstring into a UTF8 std::string.
|
2018-09-18 14:48:40 +08:00
|
|
|
std::wstring to_wstring(const std::string& s) {
|
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
|
|
return converter.from_bytes(s);
|
|
|
|
}
|
2020-03-23 14:12:06 +08:00
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
2020-08-16 06:24:18 +08:00
|
|
|
#pragma warning(pop)
|
2020-08-09 20:53:56 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
} // namespace ftxui
|
2020-08-16 06:24:18 +08:00
|
|
|
|
|
|
|
// Copyright 2020 Arthur Sonzogni. All rights reserved.
|
|
|
|
// Use of this source code is governed by the MIT license that can be found in
|
|
|
|
// the LICENSE file.
|