libutil: Add support for Rust

Add basic support for building and linking Rust into libutil.
This also includes a basic test to show that the linking is successful.
This test should be removed once a more practical use for the Rust has
been found, as testing those would necessarily require linking to work.

--- 👻 jade haunting section 👻 ---

This uses a very cursed approach to ensure that static builds do not
invoke undefined behaviour caused by linking libstd and friends multiple
times. That is, for static targets, we just statically link the whole
thing into the executable, and for dynamic targets we dynamically link
all the Rust stuff.

Reference re this being ostensibly illegal: https://github.com/rust-lang/rust/issues/44322

Even if it does not cause linker errors, it is not a *good idea* to link
a Rust staticlib containing libstd into multiple C++ dylibs to be loaded
into the same executable, since it is highly unclear whether libstd
globals would be correctly shared (and stuff like -Bdynamic ever getting
into link args can absolutely murder you by changing *intra-dylib*
references to not indirect through the PLT (and thus ignore any other
loaded dylib containing the symbol) underneath your nose).

This means that a solution of liblixutil_rs, liblixcmd_rs, etc, that are
statically linked into liblixutil, liblixcmd, etc *is not safe*.

Effectively `libstd` *must* be in its own dylib in an environment
containing dynamic linking of multiple bits of Rust code.

The reason that we shouldn't just jam all the Rust in one staticlib for
shared targets as well (though I::jade can still be convinced we
*should* do it for those), is that we would have to build a
liblixrust.so that depends on *every* other Lix library, **and** every
other Lix library depends on it, exploding our dylib hierarchy
completely to uselessness.

Building a libfirefoxrust.a and then linking it in *is* what Firefox
does, but it does not work for us since our system is not one big
library/etc. It would probably have perf benefits, but so would getting
rid of dynamic linking completely.

Meson bugs encountered (for github xref to find):
Meson does not set the soname for us: https://github.com/mesonbuild/meson/issues/13537
Meson ignores link_args for Rust targets: https://github.com/mesonbuild/meson/issues/13538

Co-authored-by: Qyriad <qyriad@qyriad.me>
Co-authored-by: Jade Lovelace <lix@jade.fyi>

Change-Id: Ide390b1d2635fd0a80f12f1de992003b9dc7dfce
This commit is contained in:
Artemis Tosini 2024-05-10 02:47:21 +00:00 committed by Jade Lovelace
parent af696cfb92
commit 19de2b137f
17 changed files with 239 additions and 61 deletions

4
Cargo.lock generated
View file

@ -45,6 +45,10 @@ dependencies = [
"rowan",
]
[[package]]
name = "lixutil-rs"
version = "0.0.0"
[[package]]
name = "memoffset"
version = "0.9.1"

View file

@ -1,6 +1,6 @@
[workspace]
resolver = "2"
members = ["src/lix-doc"]
members = ["src/lix-doc", "src/libutil"]
[workspace.package]
edition = "2021"

View file

@ -340,6 +340,7 @@ endif
#
coreutils = find_program('coreutils', native : true)
dot = find_program('dot', required : false, native : true)
cbindgen = find_program('cbindgen', native : true)
pymod = import('python')
python = pymod.find_installation('python3')
@ -572,6 +573,14 @@ run_command(
check : true,
)
if is_static
# Generate rlibs to link into rust-staticlib-hack
inner_rust_crates_abi = 'rust'
else
# Generate cdylibs, don't use rust-staticlib-hack
inner_rust_crates_abi = 'c'
endif
if is_darwin
configure_file(
input : 'misc/launchd/org.nixos.nix-daemon.plist.in',

View file

@ -42,6 +42,7 @@
python3,
rapidcheck,
rustPlatform,
rust-cbindgen,
rustc,
sqlite,
toml11,
@ -219,6 +220,7 @@ stdenv.mkDerivation (finalAttrs: {
meson
ninja
cmake
rust-cbindgen
rustc
]
++ [

12
src/libutil/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
# Note: this is not really intended to actually do anything with Cargo, it
# exists to make Cargo manage dependencies in a workspace across lix-doc and
# liblixrust
#
# It is highly likely it will not actually pass build, especially as the build
# system gets more anti-Cargo.
[package]
name = "lixutil-rs"
edition.workspace = true
[lib]
path = "lib.rs"

View file

@ -0,0 +1,3 @@
language = "C++"
namespace = "nix"

12
src/libutil/lib.rs Normal file
View file

@ -0,0 +1,12 @@
pub mod exports {
#[repr(C)]
pub struct TestMultiplyArgs {
pub a: u64,
pub b: u64,
}
#[no_mangle]
pub extern "C" fn test_multiply(args: TestMultiplyArgs) -> u64 {
args.a * args.b
}
}

View file

@ -1,3 +1,21 @@
libutil_rs_h = custom_target(
'libutil-rs.hh',
input : [files('cbindgen.toml'), libutil_rs_sources],
output : ['libutil-rs.hh'],
command : [cbindgen, '--config', '@INPUT0@', '--output', '@OUTPUT0@', '--', '@INPUT1@']
)
libutil_rs_extra_link_args = []
if not is_static
# meson does not understand that "link with" for cdylibs means to "link with it". i do not know what is up with that.
# libutil_rs_extra_link_args += libutil_rs_mstatic.full_path()
endif
libutil_rs_dep = declare_dependency(
link_with : libutil_rs_mstatic,
link_args : libutil_rs_extra_link_args,
sources : [libutil_rs_h],
)
libutil_sources = files(
'archive.cc',
'args.cc',
@ -127,6 +145,7 @@ libutil = library(
'lixutil',
libutil_sources,
dependencies : [
libutil_rs_dep,
aws_sdk,
aws_s3,
boehm,

View file

@ -13,21 +13,26 @@ subproject(rowan_name, default_options : ['werror=false'])
rnix = dependency(rnix_name)
rowan = dependency(rowan_name)
lix_doc = static_library(
lix_doc = library(
'lix_doc',
sources : files('src/lib.rs'),
rust_abi : 'c',
rust_abi : inner_rust_crates_abi,
dependencies : [
rowan,
rnix,
],
rust_args : [
# TODO: do not merge with this as-is, this needs some ending generation and needs to be conditional on actually being dynamic
# This hack is required by the wombo combo of meson bugs:
# Meson does not set the soname for us: https://github.com/mesonbuild/meson/issues/13537
# Meson ignores link_args for Rust targets: https://github.com/mesonbuild/meson/issues/13538
'-Clink-arg=-Wl,-soname,liblix_doc.so',
],
# If an installed static library depends on this target, then Meson will force
# that to link with `-Wl,--whole-archive`, unless we also install this target.
# `-Wl,--whole-archive` can cause some Problems when linking multiple nested
# static libraries, so let's just install the damn thing.
install : true,
)
liblix_doc = declare_dependency(
link_with : lix_doc,
# FIXME(Qyriad): is this right?
install_rpath : libdir,
)

View file

@ -8,9 +8,8 @@ pub mod pprint;
use crate::pprint::pprint_args;
use rnix::ast::{self, Lambda};
use rnix::{NodeOrToken, SyntaxKind};
use rnix::SyntaxNode;
use rnix::{NodeOrToken, SyntaxKind};
// Needed because rnix fucked up and didn't reexport this, oops.
use rowan::ast::AstNode;
@ -294,10 +293,8 @@ pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<Stri
}
// There is literally always a lambda or something has gone very very wrong.
let lambda =
ast::Lambda::cast(
lambda.expect("no lambda found; what.")
) .expect("not a rnix::ast::Lambda; what.");
let lambda = ast::Lambda::cast(lambda.expect("no lambda found; what."))
.expect("not a rnix::ast::Lambda; what.");
// Search up, hopefully to find the binding so we can get the identifier name.
// TODO: Just provide this directly from the C++ code to make it possible to always have the correct identifier.
@ -347,27 +344,26 @@ fn find_comment(node: &SyntaxNode) -> Option<String> {
// Consume up to one whitespace token before the first comment. There might not always be
// whitespace such as the (rather unusual) case of `/* meow */x = a: 3`.
if matches!(it.peek(), Some(NodeOrToken::Token(token)) if token.kind() == SyntaxKind::TOKEN_WHITESPACE) {
if matches!(it.peek(), Some(NodeOrToken::Token(token)) if token.kind() == SyntaxKind::TOKEN_WHITESPACE)
{
it.next();
}
let comments = it.map_while(|element| match element {
NodeOrToken::Token(token) => {
match token.kind() {
// Map the tokens we're interested in to our internal token type.
SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())),
SyntaxKind::TOKEN_WHITESPACE => {
Some(DocToken::Whitespace(token.text().to_owned()))
}
// If we hit a different token type, we know we've gone past relevant comments
// and should stop.
_ => None,
}
NodeOrToken::Token(token) => {
match token.kind() {
// Map the tokens we're interested in to our internal token type.
SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())),
SyntaxKind::TOKEN_WHITESPACE => Some(DocToken::Whitespace(token.text().to_owned())),
// If we hit a different token type, we know we've gone past relevant comments
// and should stop.
_ => None,
}
// If we hit a node entry we've definitely gone past comments that would be related to
// this node and we should retreat.
_ => None,
});
}
// If we hit a node entry we've definitely gone past comments that would be related to
// this node and we should retreat.
_ => None,
});
// For the curious, `into_iter()` here consumes the binding producing an owned value allowing us to avoid
// making the original binding mutable, we don't reuse it later so this is a cute way to handle it, though
@ -375,39 +371,43 @@ fn find_comment(node: &SyntaxNode) -> Option<String> {
Some(cleanup_comments(&mut comments.into_iter())).filter(|c| !c.is_empty())
}
/// Get the docs for a function in the given file path at the given file position and return it as
/// a C string pointer
#[no_mangle]
pub extern "C" fn lixdoc_get_function_docs(
filename: *const c_char,
line: usize,
col: usize,
) -> *const c_char {
let fname = unsafe { CStr::from_ptr(filename) };
fname
.to_str()
.ok()
.and_then(|f| {
panic::catch_unwind(|| get_function_docs(f, line, col))
.map_err(|e| {
eprintln!("panic!! {:#?}", e);
e
})
.ok()
})
.flatten()
.and_then(|s| CString::new(s).ok())
.map(|s| s.into_raw() as *const c_char)
.unwrap_or(ptr::null())
}
pub mod exports {
use super::*;
/// Call this to free a string from `lixdoc_get_function_docs`.
#[no_mangle]
pub extern "C" fn lixdoc_free_string(s: *const c_char) {
unsafe {
// cast note: this cast is turning something that was cast to const
// back to mut
drop(CString::from_raw(s as *mut c_char));
/// Get the docs for a function in the given file path at the given file position and return it as
/// a C string pointer
#[no_mangle]
pub extern "C" fn lixdoc_get_function_docs(
filename: *const c_char,
line: usize,
col: usize,
) -> *const c_char {
let fname = unsafe { CStr::from_ptr(filename) };
fname
.to_str()
.ok()
.and_then(|f| {
panic::catch_unwind(|| get_function_docs(f, line, col))
.map_err(|e| {
eprintln!("panic!! {:#?}", e);
e
})
.ok()
})
.flatten()
.and_then(|s| CString::new(s).ok())
.map(|s| s.into_raw() as *const c_char)
.unwrap_or(ptr::null())
}
/// Call this to free a string from `lixdoc_get_function_docs`.
#[no_mangle]
pub extern "C" fn lixdoc_free_string(s: *const c_char) {
unsafe {
// cast note: this cast is turning something that was cast to const
// back to mut
drop(CString::from_raw(s as *mut c_char));
}
}
}

View file

@ -1,6 +1,47 @@
# Subcomponents: these link into artifacts themselves, and have interdependencies.
subdir('lix-doc')
# See rust-staticlib-hack/README.md for a description of why this is such madness.
# In short it's because as far as I know, it is only sound to link one Rust
# staticlib into an executable at a time, so we have to comply with that.
libutil_rs_sources = files(
'libutil/lib.rs'
)
libutil_rs = library(
'lixutil_rs',
rust_abi : inner_rust_crates_abi,
sources : libutil_rs_sources,
rust_args : [
# TODO: do not merge with this as-is, this needs some ending generation and needs to be conditional on actually being dynamic
# This hack is required by the wombo combo of meson bugs:
# Meson does not set the soname for us: https://github.com/mesonbuild/meson/issues/13537
# Meson ignores link_args for Rust targets: https://github.com/mesonbuild/meson/issues/13538
'-Clink-arg=-Wl,-soname,liblixutil_rs.so',
],
# If an installed static library depends on this target, then Meson will force
# that to link with `-Wl,--whole-archive`, unless we also install this target.
# `-Wl,--whole-archive` can cause some Problems when linking multiple nested
# static libraries, so let's just install the damn thing.
install : true,
# FIXME(Qyriad): is this right?
install_rpath : libdir,
)
if is_static
subdir('rust-staticlib-hack')
lix_doc_mstatic = rust_staticlib_hack
libutil_rs_mstatic = rust_staticlib_hack
else
lix_doc_mstatic = lix_doc
libutil_rs_mstatic = libutil_rs
endif
# needs to be declared here, since only here do we have the staticlib hack resolved
liblix_doc = declare_dependency(
link_with : lix_doc_mstatic
)
subdir('libutil')
# Load-bearing order. libstore depends on libutil.
subdir('libstore')

View file

@ -0,0 +1,13 @@
# As with libutil, this is not guaranteed to actually build with Cargo, but exists to
# manage lock files.
[package]
name = "rust-staticlib-hack"
edition.workspace = true
[lib]
crate-type = ["staticlib"]
path = "lib.rs"
[dependencies]
lixutil-rs = { path = "../libutil" }
lix-doc = { path = "../lix-doc" }

View file

@ -0,0 +1,13 @@
# rust staticlib hack
Rust needs to be defined first in Meson since it may need to be linked into one big staticlib blob which has references resolved Later, if we are building in static mode.
In dynamic mode this is all fine because you are allowed to have multiple Rust cdylibs, at least with -C prefer-dynamic, AFAIK.
This big hack is because Rust is not officially allowed to have multiple Rust staticlibs linked into one executable, and there is no way to build a staticlib and *link libstd separately*, so you just rely on the linker not exploding if duplicate symbols occur, or relying on the linker actually getting multiple instances of libstd globals deduplicated correctly and..... ewwwww UB.
See: https://github.com/rust-lang/rust/issues/44322
The architecture of the Lix Rust linking is that:
- In shared builds, the dependencies follow the C++ library dependency tree
- In static builds, rust-staticlib-hack is depended upon by every library and
ultimately winds up as the sole rust staticlib linked into the executables.

View file

@ -0,0 +1,13 @@
//! This is a hack that forces Rust to link all the Lix libs as a single static
//! library for usage when building Lix itself in static mode.
//!
//! The reason for this is that Rust does not support linking multiple
//! staticlibs into one executable, as it will jam a libstd into every single
//! one of them. This is ridiculously goofy because it would be trivially
//! solved by linking libstd separately.
//!
//! https://github.com/rust-lang/rust/issues/44322
//!
//! It re-exports all the symbols that should be exported to C++.
pub use lix_doc::exports::*;
pub use lixutil_rs::exports::*;

View file

@ -0,0 +1,16 @@
# This file is only sometimes used! See README.md in this directory.
rust_staticlib_hack_sources = files(
'lib.rs',
)
rust_staticlib_hack = static_library(
'rust_staticlib_hack',
rust_abi : 'c',
sources : rust_staticlib_hack_sources,
# libutil_rs and friends must be rlibs in the case of static builds
link_with : [
libutil_rs,
lix_doc,
],
)

View file

@ -0,0 +1,11 @@
#include "libutil-rs.hh"
#include <gtest/gtest.h>
namespace nix {
TEST(RustLink, BasicLink)
{
TestMultiplyArgs args {.a = 2, .b = 3};
ASSERT_EQ(test_multiply(args), 6);
}
}

View file

@ -56,6 +56,7 @@ libutil_tests_sources = files(
'libutil/pool.cc',
'libutil/references.cc',
'libutil/serialise.cc',
'libutil/rust-link.cc',
'libutil/suggestions.cc',
'libutil/tests.cc',
'libutil/url.cc',
@ -72,6 +73,10 @@ libutil_tester = executable(
gtest,
boehm,
liblixutil,
# cannot link to something transitively included and ELF does not support
# symbol reexports unlike every other major executable format.
# This is just required for the one test checking the Rust linking works.
libutil_rs_dep,
liblixexpr_mstatic,
liblixutil_test_support,
nlohmann_json,