Feature: Add multi-line input. (#630)

This commit is contained in:
Arthur Sonzogni
2023-05-02 13:32:37 +02:00
committed by GitHub
parent 6215240b37
commit 7de4f8683d
33 changed files with 1471 additions and 690 deletions

View File

@@ -7,15 +7,22 @@
#include "ftxui/screen/string.hpp"
#include <array> // for array
#include <cstdint> // for uint32_t, uint8_t, uint16_t, int32_t
#include <string> // for string, basic_string, wstring
#include <tuple> // for _Swallow_assign, ignore
#include <stddef.h> // for size_t
#include <array> // for array
#include <cstdint> // for uint32_t, uint8_t, uint16_t, int32_t
#include <string> // for string, basic_string, wstring
#include <tuple> // for _Swallow_assign, ignore
#include "ftxui/screen/deprecated.hpp" // for wchar_width, wstring_width
#include "ftxui/screen/deprecated.hpp" // for wchar_width, wstring_width
#include "ftxui/screen/string_internal.hpp" // for WordBreakProperty, EatCodePoint, CodepointToWordBreakProperty, GlyphCount, GlyphIterate, GlyphNext, GlyphPrevious, IsCombining, IsControl, IsFullWidth, Utf8ToWordBreakProperty
namespace {
using ftxui::EatCodePoint;
using ftxui::IsCombining;
using ftxui::IsControl;
using ftxui::IsFullWidth;
struct Interval {
uint32_t first;
uint32_t last;
@@ -1411,46 +1418,26 @@ bool Bisearch(uint32_t ucs, const std::array<C, N> table, C* out) {
return false;
}
bool IsCombining(uint32_t ucs) {
return ftxui::CodepointToWordBreakProperty(ucs) == WBP::Extend;
}
bool IsFullWidth(uint32_t ucs) {
if (ucs < 0x0300) // Quick path: // NOLINT
return false;
return Bisearch(ucs, g_full_width_characters);
}
bool IsControl(uint32_t ucs) {
if (ucs == 0) {
return true;
}
if (ucs < 32) { // NOLINT
return true;
}
if (ucs >= 0x7f && ucs < 0xa0) { // NOLINT
return true;
}
return false;
}
int codepoint_width(uint32_t ucs) {
if (IsControl(ucs)) {
if (ftxui::IsControl(ucs)) {
return -1;
}
if (IsCombining(ucs)) {
if (ftxui::IsCombining(ucs)) {
return 0;
}
if (IsFullWidth(ucs)) {
if (ftxui::IsFullWidth(ucs)) {
return 2;
}
return 1;
}
} // namespace
namespace ftxui {
// From UTF8 encoded string |input|, eat in between 1 and 4 byte representing
// one codepoint. Put the codepoint into |ucs|. Start at |start| and update
// |end| to represent the beginning of the next byte to eat for consecutive
@@ -1563,9 +1550,29 @@ bool EatCodePoint(const std::wstring& input,
return true;
}
} // namespace
bool IsCombining(uint32_t ucs) {
return ftxui::CodepointToWordBreakProperty(ucs) == WBP::Extend;
}
namespace ftxui {
bool IsFullWidth(uint32_t ucs) {
if (ucs < 0x0300) // Quick path: // NOLINT
return false;
return Bisearch(ucs, g_full_width_characters);
}
bool IsControl(uint32_t ucs) {
if (ucs == 0) {
return true;
}
if (ucs < 32) { // NOLINT
return ucs != 10; // 10 => Line feed.
}
if (ucs >= 0x7f && ucs < 0xa0) { // NOLINT
return true;
}
return false;
}
WordBreakProperty CodepointToWordBreakProperty(uint32_t codepoint) {
WordBreakPropertyInterval interval = {0, 0, WBP::ALetter};
@@ -1660,12 +1667,35 @@ std::vector<std::string> Utf8ToGlyphs(const std::string& input) {
return out;
}
int GlyphPosition(const std::string& input, size_t glyph_index, size_t start) {
if (glyph_index <= 0) {
return 0;
size_t GlyphPrevious(const std::string& input, size_t start) {
while (true) {
if (start == 0) {
return 0;
}
start--;
// Skip the UTF8 continuation bytes.
if ((input[start] & 0b1100'0000) == 0b1000'0000) {
continue;
}
uint32_t codepoint = 0;
size_t end = 0;
const bool eaten = EatCodePoint(input, start, &end, &codepoint);
// Ignore invalid, control characters and combining characters.
if (!eaten || IsControl(codepoint) || IsCombining(codepoint)) {
continue;
}
return start;
}
size_t end = 0;
}
size_t GlyphNext(const std::string& input, size_t start) {
bool glyph_found = false;
while (start < input.size()) {
size_t end = 0;
uint32_t codepoint = 0;
const bool eaten = EatCodePoint(input, start, &end, &codepoint);
@@ -1677,17 +1707,31 @@ int GlyphPosition(const std::string& input, size_t glyph_index, size_t start) {
// We eat the beginning of the next glyph. If we are eating the one
// requested, return its start position immediately.
if (glyph_index == 0) {
if (glyph_found) {
return static_cast<int>(start);
}
// Otherwise, skip this glyph and iterate:
glyph_index--;
glyph_found = true;
start = end;
}
return static_cast<int>(input.size());
}
size_t GlyphIterate(const std::string& input, int glyph_offset, size_t start) {
if (glyph_offset >= 0) {
for (int i = 0; i < glyph_offset; ++i) {
start = GlyphNext(input, start);
}
return start;
} else {
for (int i = 0; i < -glyph_offset; ++i) {
start = GlyphPrevious(input, start);
}
return start;
}
}
std::vector<int> CellToGlyphIndex(const std::string& input) {
int x = -1;
std::vector<int> out;

View File

@@ -0,0 +1,64 @@
#ifndef FTXUI_SCREEN_STRING_INTERNAL_HPP
#define FTXUI_SCREEN_STRING_INTERNAL_HPP
namespace ftxui {
bool EatCodePoint(const std::string& input,
size_t start,
size_t* end,
uint32_t* ucs);
bool EatCodePoint(const std::wstring& input,
size_t start,
size_t* end,
uint32_t* ucs);
bool IsCombining(uint32_t ucs);
bool IsFullWidth(uint32_t ucs);
bool IsControl(uint32_t ucs);
size_t GlyphPrevious(const std::string& input, size_t start);
size_t GlyphNext(const std::string& input, size_t start);
// Return the index in the |input| string of the glyph at |glyph_offset|,
// starting at |start|
size_t GlyphIterate(const std::string& input,
int glyph_offset,
size_t start = 0);
// Returns the number of glyphs in |input|.
int GlyphCount(const std::string& input);
// Properties from:
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/WordBreakProperty.txt
enum class WordBreakProperty {
ALetter,
CR,
Double_Quote,
Extend,
ExtendNumLet,
Format,
Hebrew_Letter,
Katakana,
LF,
MidLetter,
MidNum,
MidNumLet,
Newline,
Numeric,
Regional_Indicator,
Single_Quote,
WSegSpace,
ZWJ,
};
WordBreakProperty CodepointToWordBreakProperty(uint32_t codepoint);
std::vector<WordBreakProperty> Utf8ToWordBreakProperty(
const std::string& input);
bool IsWordBreakingCharacter(const std::string& input, size_t glyph_index);
} // namespace ftxui
#endif /* end of include guard: FTXUI_SCREEN_STRING_INTERNAL_HPP */
// Copyright 2023 Arthur Sonzogni. All rights reserved.
// Use of this source code is governed by the MIT license that can be found in
// the LICENSE file.

View File

@@ -1,6 +1,7 @@
#include "ftxui/screen/string.hpp"
#include <gtest/gtest.h>
#include <string> // for allocator, string
#include "ftxui/screen/string_internal.hpp"
namespace ftxui {
@@ -61,41 +62,41 @@ TEST(StringTest, GlyphCount) {
EXPECT_EQ(GlyphCount("a\1a"), 2);
}
TEST(StringTest, GlyphPosition) {
TEST(StringTest, GlyphIterate) {
// Basic:
EXPECT_EQ(GlyphPosition("", -1), 0);
EXPECT_EQ(GlyphPosition("", 0), 0);
EXPECT_EQ(GlyphPosition("", 1), 0);
EXPECT_EQ(GlyphPosition("a", 0), 0);
EXPECT_EQ(GlyphPosition("a", 1), 1);
EXPECT_EQ(GlyphPosition("ab", 0), 0);
EXPECT_EQ(GlyphPosition("ab", 1), 1);
EXPECT_EQ(GlyphPosition("ab", 2), 2);
EXPECT_EQ(GlyphPosition("abc", 0), 0);
EXPECT_EQ(GlyphPosition("abc", 1), 1);
EXPECT_EQ(GlyphPosition("abc", 2), 2);
EXPECT_EQ(GlyphPosition("abc", 3), 3);
EXPECT_EQ(GlyphIterate("", -1), 0);
EXPECT_EQ(GlyphIterate("", 0), 0);
EXPECT_EQ(GlyphIterate("", 1), 0);
EXPECT_EQ(GlyphIterate("a", 0), 0);
EXPECT_EQ(GlyphIterate("a", 1), 1);
EXPECT_EQ(GlyphIterate("ab", 0), 0);
EXPECT_EQ(GlyphIterate("ab", 1), 1);
EXPECT_EQ(GlyphIterate("ab", 2), 2);
EXPECT_EQ(GlyphIterate("abc", 0), 0);
EXPECT_EQ(GlyphIterate("abc", 1), 1);
EXPECT_EQ(GlyphIterate("abc", 2), 2);
EXPECT_EQ(GlyphIterate("abc", 3), 3);
// Fullwidth glyphs:
EXPECT_EQ(GlyphPosition("", 0), 0);
EXPECT_EQ(GlyphPosition("", 1), 3);
EXPECT_EQ(GlyphPosition("测试", 0), 0);
EXPECT_EQ(GlyphPosition("测试", 1), 3);
EXPECT_EQ(GlyphPosition("测试", 2), 6);
EXPECT_EQ(GlyphPosition("测试", 1, 3), 6);
EXPECT_EQ(GlyphPosition("测试", 1, 0), 3);
EXPECT_EQ(GlyphIterate("", 0), 0);
EXPECT_EQ(GlyphIterate("", 1), 3);
EXPECT_EQ(GlyphIterate("测试", 0), 0);
EXPECT_EQ(GlyphIterate("测试", 1), 3);
EXPECT_EQ(GlyphIterate("测试", 2), 6);
EXPECT_EQ(GlyphIterate("测试", 1, 3), 6);
EXPECT_EQ(GlyphIterate("测试", 1, 0), 3);
// Combining characters:
EXPECT_EQ(GlyphPosition("", 0), 0);
EXPECT_EQ(GlyphPosition("", 1), 3);
EXPECT_EQ(GlyphPosition("a⃒a̗ā", 0), 0);
EXPECT_EQ(GlyphPosition("a⃒a̗ā", 1), 4);
EXPECT_EQ(GlyphPosition("a⃒a̗ā", 2), 7);
EXPECT_EQ(GlyphPosition("a⃒a̗ā", 3), 10);
EXPECT_EQ(GlyphIterate("", 0), 0);
EXPECT_EQ(GlyphIterate("", 1), 3);
EXPECT_EQ(GlyphIterate("a⃒a̗ā", 0), 0);
EXPECT_EQ(GlyphIterate("a⃒a̗ā", 1), 4);
EXPECT_EQ(GlyphIterate("a⃒a̗ā", 2), 7);
EXPECT_EQ(GlyphIterate("a⃒a̗ā", 3), 10);
// Control characters:
EXPECT_EQ(GlyphPosition("\1", 0), 0);
EXPECT_EQ(GlyphPosition("\1", 1), 1);
EXPECT_EQ(GlyphPosition("a\1a", 0), 0);
EXPECT_EQ(GlyphPosition("a\1a", 1), 2);
EXPECT_EQ(GlyphPosition("a\1a", 2), 3);
EXPECT_EQ(GlyphIterate("\1", 0), 0);
EXPECT_EQ(GlyphIterate("\1", 1), 1);
EXPECT_EQ(GlyphIterate("a\1a", 0), 0);
EXPECT_EQ(GlyphIterate("a\1a", 1), 2);
EXPECT_EQ(GlyphIterate("a\1a", 2), 3);
}
TEST(StringTest, CellToGlyphIndex) {
@@ -135,7 +136,7 @@ TEST(StringTest, Utf8ToWordBreakProperty) {
EXPECT_EQ(Utf8ToWordBreakProperty(":"), T({P::MidLetter}));
EXPECT_EQ(Utf8ToWordBreakProperty("."), T({P::MidNumLet}));
EXPECT_EQ(Utf8ToWordBreakProperty("\r"), T({})); // FIXME
EXPECT_EQ(Utf8ToWordBreakProperty("\n"), T({})); // FIXME
EXPECT_EQ(Utf8ToWordBreakProperty("\n"), T({P::LF}));
}
TEST(StringTest, to_string) {