From 1425aa0b7cd0d3477589f75bea4fb9c74e057fed Mon Sep 17 00:00:00 2001 From: Qyriad Date: Tue, 30 Apr 2024 18:11:14 -0600 Subject: [PATCH] implement parsing human-readable names from URLs Based off of commit 257b768436a0e8ab7887f9b790c5b92a7fe51ef5 Upstream-PR: https://github.com/NixOS/nix/pull/8678 Co-authored-by: Felix Uhl Change-Id: Idcb7f6191ca3310ef9dc854197f7798260c3f71d --- src/libutil/meson.build | 2 + src/libutil/url-name.cc | 59 ++++++++++++++++++++++++++++++ src/libutil/url-name.hh | 26 +++++++++++++ src/libutil/url-parts.hh | 1 + src/libutil/url.cc | 2 +- tests/unit/libutil/url-name.cc | 67 ++++++++++++++++++++++++++++++++++ tests/unit/meson.build | 1 + 7 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 src/libutil/url-name.cc create mode 100644 src/libutil/url-name.hh create mode 100644 tests/unit/libutil/url-name.cc diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 069798a6f..8caa0532a 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -31,6 +31,7 @@ libutil_sources = files( 'tarfile.cc', 'thread-pool.cc', 'url.cc', + 'url-name.cc', 'util.cc', 'xml-writer.cc', ) @@ -92,6 +93,7 @@ libutil_headers = files( 'topo-sort.hh', 'types.hh', 'url-parts.hh', + 'url-name.hh', 'url.hh', 'util.hh', 'variant-wrapper.hh', diff --git a/src/libutil/url-name.cc b/src/libutil/url-name.cc new file mode 100644 index 000000000..6ef58c80a --- /dev/null +++ b/src/libutil/url-name.cc @@ -0,0 +1,59 @@ +#include +#include + +#include "url-name.hh" + +namespace nix { + +static std::string const attributeNamePattern("[a-z0-9_-]+"); +static std::regex const lastAttributeRegex("(?:" + attributeNamePattern + "\\.)*(?!default)(" + attributeNamePattern +")(\\^.*)?"); +static std::string const pathSegmentPattern("[a-zA-Z0-9_-]+"); +static std::regex const lastPathSegmentRegex(".*/(" + pathSegmentPattern +")"); +static std::regex const secondPathSegmentRegex("(?:" + pathSegmentPattern + ")/(" + pathSegmentPattern +")(?:/.*)?"); +static std::regex const gitProviderRegex("github|gitlab|sourcehut"); +static std::regex const gitSchemeRegex("git($|\\+.*)"); +static std::regex const defaultOutputRegex(".*\\.default($|\\^.*)"); + +std::optional getNameFromURL(ParsedURL const & url) +{ + std::smatch match; + + /* If there is a dir= argument, use its value */ + if (url.query.count("dir") > 0) { + return url.query.at("dir"); + } + + /* If the fragment isn't a "default" and contains two attribute elements, use the last one */ + if (std::regex_match(url.fragment, match, lastAttributeRegex)) { + return match.str(1); + } + + /* If this is a github/gitlab/sourcehut flake, use the repo name */ + if ( + std::regex_match(url.scheme, gitProviderRegex) + && std::regex_match(url.path, match, secondPathSegmentRegex) + ) { + return match.str(1); + } + + /* If it is a regular git flake, use the directory name */ + if ( + std::regex_match(url.scheme, gitSchemeRegex) + && std::regex_match(url.path, match, lastPathSegmentRegex) + ) { + return match.str(1); + } + + /* If everything failed but there is a non-default fragment, use it in full */ + if (!url.fragment.empty() && !std::regex_match(url.fragment, defaultOutputRegex)) + return url.fragment; + + /* If there is no fragment, take the last element of the path */ + if (std::regex_match(url.path, match, lastPathSegmentRegex)) + return match.str(1); + + /* If even that didn't work, the URL does not contain enough info to determine a useful name */ + return {}; +} + +} diff --git a/src/libutil/url-name.hh b/src/libutil/url-name.hh new file mode 100644 index 000000000..3a3f88e76 --- /dev/null +++ b/src/libutil/url-name.hh @@ -0,0 +1,26 @@ +#pragma once +///@file url-name.hh, for some hueristic-ish URL parsing. + +#include +#include + +#include "url.hh" +#include "url-parts.hh" +#include "util.hh" +#include "split.hh" + +namespace nix { + +/** + * Try to extract a reasonably unique and meaningful, human-readable + * name of a flake output from a parsed URL. + * When nullopt is returned, the callsite should use information available + * to it outside of the URL to determine a useful name. + * This is a heuristic approach intended for user interfaces. + * @return nullopt if the extracted name is not useful to identify a + * flake output, for example because it is empty or "default". + * Otherwise returns the extracted name. + */ +std::optional getNameFromURL(ParsedURL const & url); + +} diff --git a/src/libutil/url-parts.hh b/src/libutil/url-parts.hh index 6255c1d02..6efcc7e50 100644 --- a/src/libutil/url-parts.hh +++ b/src/libutil/url-parts.hh @@ -19,6 +19,7 @@ const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncod const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string fragmentRegex = "(?:" + pcharRegex + "|[/? \"^])*"; const static std::string segmentRegex = "(?:" + pcharRegex + "*)"; const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; diff --git a/src/libutil/url.cc b/src/libutil/url.cc index a8f7d39fd..afccc4245 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -16,7 +16,7 @@ ParsedURL parseURL(const std::string & url) "((" + schemeRegex + "):" + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))" + "(?:\\?(" + queryRegex + "))?" - + "(?:#(" + queryRegex + "))?", + + "(?:#(" + fragmentRegex + "))?", std::regex::ECMAScript); std::smatch match; diff --git a/tests/unit/libutil/url-name.cc b/tests/unit/libutil/url-name.cc new file mode 100644 index 000000000..f637efa89 --- /dev/null +++ b/tests/unit/libutil/url-name.cc @@ -0,0 +1,67 @@ +#include "url-name.hh" +#include + +namespace nix { + +/* ----------- tests for url-name.hh --------------------------------------------------*/ + + TEST(getNameFromURL, getsNameFromURL) { + ASSERT_EQ(getNameFromURL(parseURL("path:/home/user/project")), "project"); + ASSERT_EQ(getNameFromURL(parseURL("path:~/repos/nixpkgs#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("path:.#nonStandardAttr.mylaptop")), "nonStandardAttr.mylaptop"); + ASSERT_EQ(getNameFromURL(parseURL("path:./repos/myflake#nonStandardAttr.mylaptop")), "nonStandardAttr.mylaptop"); + ASSERT_EQ(getNameFromURL(parseURL("path:./nixpkgs#packages.x86_64-linux.complex^bin,man")), "complex"); + ASSERT_EQ(getNameFromURL(parseURL("path:./myproj#packages.x86_64-linux.default^*")), "myproj"); + + ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nixpkgs#hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nix#packages.x86_64-linux.default")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nix#")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("github:NixOS/nix")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("github:cachix/devenv/main#packages.x86_64-linux.default")), "devenv"); + ASSERT_EQ(getNameFromURL(parseURL("github:edolstra/nix-warez?rev=1234&dir=blender&ref=master")), "blender"); + + ASSERT_EQ(getNameFromURL(parseURL("gitlab:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("gitlab:NixOS/nixpkgs#hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("gitlab:NixOS/nix#packages.x86_64-linux.default")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("gitlab:NixOS/nix#")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("gitlab:NixOS/nix")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("gitlab:cachix/devenv/main#packages.x86_64-linux.default")), "devenv"); + + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:NixOS/nixpkgs#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:NixOS/nixpkgs#hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:NixOS/nix#packages.x86_64-linux.default")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:NixOS/nix#")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:NixOS/nix")), "nix"); + ASSERT_EQ(getNameFromURL(parseURL("sourcehut:cachix/devenv/main#packages.x86_64-linux.default")), "devenv"); + + ASSERT_EQ(getNameFromURL(parseURL("git://github.com/edolstra/dwarffs")), "dwarffs"); + ASSERT_EQ(getNameFromURL(parseURL("git://github.com/edolstra/nix-warez?dir=blender")), "blender"); + ASSERT_EQ(getNameFromURL(parseURL("git+file:///home/user/project")), "project"); + ASSERT_EQ(getNameFromURL(parseURL("git+file:///home/user/project?ref=fa1e2d23a22")), "project"); + ASSERT_EQ(getNameFromURL(parseURL("git+ssh://git@github.com/someuser/my-repo#")), "my-repo"); + ASSERT_EQ(getNameFromURL(parseURL("git+git://github.com/someuser/my-repo?rev=v1.2.3")), "my-repo"); + ASSERT_EQ(getNameFromURL(parseURL("git+ssh:///home/user/project?dir=subproject&rev=v2.4")), "subproject"); + ASSERT_EQ(getNameFromURL(parseURL("git+http://not-even-real#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("git+https://not-even-real#packages.aarch64-darwin.hello")), "hello"); + + ASSERT_EQ(getNameFromURL(parseURL("tarball+http://github.com/NixOS/nix/archive/refs/tags/2.18.1#packages.x86_64-linux.jq")), "jq"); + ASSERT_EQ(getNameFromURL(parseURL("tarball+https://github.com/NixOS/nix/archive/refs/tags/2.18.1#packages.x86_64-linux.hg")), "hg"); + ASSERT_EQ(getNameFromURL(parseURL("tarball+file:///home/user/Downloads/nixpkgs-2.18.1#packages.aarch64-darwin.ripgrep")), "ripgrep"); + + ASSERT_EQ(getNameFromURL(parseURL("https://github.com/NixOS/nix/archive/refs/tags/2.18.1.tar.gz#packages.x86_64-linux.pv")), "pv"); + ASSERT_EQ(getNameFromURL(parseURL("http://github.com/NixOS/nix/archive/refs/tags/2.18.1.tar.gz#packages.x86_64-linux.pv")), "pv"); + + ASSERT_EQ(getNameFromURL(parseURL("file:///home/user/project?ref=fa1e2d23a22")), "project"); + ASSERT_EQ(getNameFromURL(parseURL("file+file:///home/user/project?ref=fa1e2d23a22")), "project"); + ASSERT_EQ(getNameFromURL(parseURL("file+http://not-even-real#packages.x86_64-linux.hello")), "hello"); + ASSERT_EQ(getNameFromURL(parseURL("file+http://gitfantasy.com/org/user/notaflake")), "notaflake"); + ASSERT_EQ(getNameFromURL(parseURL("file+https://not-even-real#packages.aarch64-darwin.hello")), "hello"); + + ASSERT_EQ(getNameFromURL(parseURL("https://www.github.com/")), std::nullopt); + ASSERT_EQ(getNameFromURL(parseURL("path:.")), std::nullopt); + ASSERT_EQ(getNameFromURL(parseURL("file:.#")), std::nullopt); + ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default")), std::nullopt); + ASSERT_EQ(getNameFromURL(parseURL("path:.#packages.x86_64-linux.default^*")), std::nullopt); + } +} diff --git a/tests/unit/meson.build b/tests/unit/meson.build index ae850df47..339ac9a4a 100644 --- a/tests/unit/meson.build +++ b/tests/unit/meson.build @@ -52,6 +52,7 @@ libutil_tests_sources = files( 'libutil/suggestions.cc', 'libutil/tests.cc', 'libutil/url.cc', + 'libutil/url-name.cc', 'libutil/xml-writer.cc', )