From ef84c780bb901011e090b9f12d293d136193a428 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 16 Nov 2020 16:26:29 +0100 Subject: [PATCH] filterANSIEscapes(): Handle UTF-8 characters --- src/libutil/tests/tests.cc | 10 ++++++++++ src/libutil/util.cc | 23 ++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/libutil/tests/tests.cc b/src/libutil/tests/tests.cc index ffba832d8..35a5d27bb 100644 --- a/src/libutil/tests/tests.cc +++ b/src/libutil/tests/tests.cc @@ -587,4 +587,14 @@ namespace nix { ASSERT_EQ(filterANSIEscapes(s, true), "foo bar baz" ); } + + TEST(filterANSIEscapes, utf8) { + ASSERT_EQ(filterANSIEscapes("foobar", true, 5), "fooba"); + ASSERT_EQ(filterANSIEscapes("fóóbär", true, 6), "fóóbär"); + ASSERT_EQ(filterANSIEscapes("fóóbär", true, 5), "fóóbä"); + ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó"); + ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b"); + ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b"); + } + } diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 53342b5cb..01ab9111f 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -1409,7 +1409,28 @@ std::string filterANSIEscapes(const std::string & s, bool filterAll, unsigned in i++; else { - t += *i++; w++; + w++; + // Copy one UTF-8 character. + if ((*i & 0xe0) == 0xc0) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++; + } else if ((*i & 0xf0) == 0xe0) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++; + } + } else if ((*i & 0xf8) == 0xf0) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) { + t += *i++; + if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++; + } + } + } else + t += *i++; } }