From a378c619485cb900c2de8f1aa768ffaf981d2320 Mon Sep 17 00:00:00 2001 From: Emilia Bopp Date: Sun, 10 Nov 2024 14:39:21 +0100 Subject: [PATCH] libutil: fix non-ASCII chars in URL encoding Due to the cast to (unsigned int), the encoding appended broken bytes padding. This is fixed here with a bitmask. Fixes: https://git.lix.systems/lix-project/lix/issues/562 Change-Id: I0c93bd2b8c2f82df208d4693b7254544e3121dc3 --- src/libutil/url.cc | 2 +- tests/unit/libutil/url.cc | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 2de50dd4d..f0586189b 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -104,7 +104,7 @@ std::string percentEncode(std::string_view s, std::string_view keep) || keep.find(c) != std::string::npos) res += c; else - res += fmt("%%%02X", (unsigned int) c); + res += fmt("%%%02X", 0xff & (unsigned int) c); return res; } diff --git a/tests/unit/libutil/url.cc b/tests/unit/libutil/url.cc index bfd9a228a..ecf544618 100644 --- a/tests/unit/libutil/url.cc +++ b/tests/unit/libutil/url.cc @@ -303,7 +303,6 @@ namespace nix { ASSERT_EQ(d, s); } - /* ---------------------------------------------------------------------------- * percentEncode * --------------------------------------------------------------------------*/ @@ -336,4 +335,10 @@ namespace nix { ASSERT_EQ(d, s); } + TEST(percentEncode, utf8Input) { + std::string s = percentEncode("รค"); + std::string d = "%C3%A4"; + + ASSERT_EQ(d, s); + } }