lix-doc: update dependencies and improve

Change-Id: I82119cf433dffd2864e76f24d00d876032f1b1a5

keep track of local changes

Change-Id: I1c383c70ef5af6fa9ac51b0e7703f0de7d5ba92d

progress commit

Change-Id: Ic1fa8883ff5adbde13ea5bd987486b463b70d8fd

it works!

Change-Id: I010ece3184fdb029047935bfdc8906e3fda04354

remove old unneeded code

Change-Id: Ibb5196281bea5ab700c650b0533738f987385f96

small cleanups

Change-Id: Ia3062045e0645026d7ad888d5f84f2436c6e399a
This commit is contained in:
Lunaphied 2024-06-18 15:39:55 -06:00
parent 79121e0c44
commit 40be935c74
17 changed files with 804 additions and 554 deletions

1
.envrc
View file

@ -7,3 +7,4 @@ if [[ -n "$NIX_BUILD_CORES" ]]; then
export MAKEFLAGS="$MAKEFLAGS -j $NIX_BUILD_CORES"
fi
export GTEST_BRIEF=1
export NIX_LDFLAGS="$NIX_LDFLAGS -L/home/lunaphied/code/lix/lix-doc/target/debug"

View file

@ -54,7 +54,7 @@
(Run `touch .nocontribmsg` to hide this message.)
'';
officialRelease = false;
officialRelease = true;
# Set to true to build the release notes for the next release.
buildUnreleasedNotes = true;

6
lix-doc/.gitignore vendored
View file

@ -1,6 +0,0 @@
# SPDX-FileCopyrightText: 2024 Jade Lovelace
#
# SPDX-License-Identifier: BSD-2-Clause OR MIT
/target
result

161
lix-doc/Cargo.lock generated
View file

@ -1,161 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cbitset"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29b6ad25ae296159fb0da12b970b2fe179b234584d7cd294c891e2bbb284466b"
dependencies = [
"num-traits",
]
[[package]]
name = "dissimilar"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632"
[[package]]
name = "expect-test"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30d9eafeadd538e68fb28016364c9732d78e420b9ff8853fa5e4058861e9f8d3"
dependencies = [
"dissimilar",
"once_cell",
]
[[package]]
name = "lix-doc"
version = "0.0.1"
dependencies = [
"expect-test",
"rnix",
]
[[package]]
name = "num-traits"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rnix"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a9b645f0edba447dbfc6473dd22999f46a1d00ab39e777a2713a1cf34a1597b"
dependencies = [
"cbitset",
"rowan",
]
[[package]]
name = "rowan"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ea7cadf87a9d8432e85cb4eb86bd2e765ace60c24ef86e79084dcae5d1c5a19"
dependencies = [
"rustc-hash",
"smol_str",
"text_unit",
"thin-dst",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "serde"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "smol_str"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9"
dependencies = [
"serde",
]
[[package]]
name = "syn"
version = "2.0.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "text_unit"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20431e104bfecc1a40872578dbc390e10290a0e9c35fffe3ce6f73c15a9dbfc2"
[[package]]
name = "thin-dst"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

View file

@ -1,338 +0,0 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace
//
// SPDX-License-Identifier: BSD-2-Clause OR MIT
//! library components of nix-doc
pub mod pprint;
use crate::pprint::pprint_args;
use rnix::types::{Lambda, TypedNode};
use rnix::SyntaxKind::*;
use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent};
use std::ffi::{CStr, CString};
use std::fs;
use std::iter;
use std::os::raw::c_char;
use std::panic;
use std::ptr;
use std::{fmt::Display, str};
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
const DOC_INDENT: usize = 3;
struct SearchResult {
/// Name of the function
identifier: String,
/// Dedented documentation comments
doc: String,
/// Parameter block for the function
param_block: String,
}
fn find_pos(file: &str, line: usize, col: usize) -> usize {
let mut lines = 1;
let mut line_start = 0;
let mut it = file.chars().enumerate().peekable();
while let Some((count, ch)) = it.next() {
if ch == '\n' || ch == '\r' {
lines += 1;
let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') {
it.next();
1
} else {
0
};
line_start = count + addend;
}
let col_diff = ((count as i32) - (line_start as i32)).abs() as usize;
if lines == line && col_diff == col {
return count;
}
}
unreachable!();
}
impl SearchResult {
fn format<P: Display>(&self, filename: P, line: usize) -> String {
format!(
"**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
self.identifier.as_str(),
self.param_block,
indented(&self.doc, DOC_INDENT),
format!("{}:{}", filename, line).as_str(),
)
}
}
/// Emits a string `s` indented by `indent` spaces
fn indented(s: &str, indent: usize) -> String {
let indent_s = iter::repeat(' ').take(indent).collect::<String>();
s.split('\n')
.map(|line| indent_s.clone() + line)
.collect::<Vec<_>>()
.join("\n")
}
/// Cleans up a single line, erasing prefix single line comments but preserving indentation
fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
let mut cmt_new_start = 0;
let mut iter = s.char_indices().peekable();
while let Some((idx, ch)) = iter.next() {
// peek at the next character, with an explicit '\n' as "next character" at end of line
let (_, next_ch) = iter.peek().unwrap_or(&(0, '\n'));
// if we find a character, save the byte position after it as our new string start
if ch == '#' || (ch == '*' && next_ch.is_whitespace()) {
cmt_new_start = idx + 1;
break;
}
// if, instead, we are on a line with no starting comment characters, leave it alone as it
// will be handled by dedent later
if !ch.is_whitespace() {
break;
}
}
&s[cmt_new_start..]
}
/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking
/// at the second line if it can.
fn dedent_comment(s: &str) -> String {
let mut whitespaces = 0;
let mut lines = s.lines();
let first = lines.next();
// scan for whitespace
for line in lines.chain(first) {
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
if line_whitespace != line.len() {
// a non-whitespace line, perfect for taking whitespace off of
whitespaces = line_whitespace;
break;
}
}
// maybe the first considered line we found was indented further, so let's look for more lines
// that might have a shorter indent. In the case of one line, do nothing.
for line in s.lines().skip(1) {
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
if line_whitespace != line.len() {
whitespaces = line_whitespace.min(whitespaces);
}
}
// delete up to `whitespaces` whitespace characters from each line and reconstitute the string
let mut out = String::new();
for line in s.lines() {
let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
out.push_str(&line[content_begin.min(whitespaces)..]);
out.push('\n');
}
out.truncate(out.trim_end_matches('\n').len());
out
}
/// Deletes whitespace and leading comment characters
///
/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a
/// multiline comment, they will be deleted.
fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String {
dedent_comment(
&comment
.rev()
.map(|small_comment| {
small_comment
.as_ref()
// space before multiline start
.trim_start()
// multiline starts
.trim_start_matches("/*")
// trailing so we can grab multiline end
.trim_end()
// multiline ends
.trim_end_matches("*/")
// extra space that was in the multiline
.trim()
.split('\n')
// erase single line comments and such
.map(cleanup_single_line)
.collect::<Vec<_>>()
.join("\n")
})
.collect::<Vec<_>>()
.join("\n"),
)
}
/// Get the docs for a specific function
pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
let content = fs::read(filename).ok()?;
let decoded = str::from_utf8(&content).ok()?;
let pos = find_pos(&decoded, line, col);
let rowan_pos = TextUnit::from_usize(pos);
let tree = rnix::parse(decoded);
let mut lambda = None;
for node in tree.node().preorder() {
match node {
WalkEvent::Enter(n) => {
if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA {
lambda = Lambda::cast(n);
break;
}
}
WalkEvent::Leave(_) => (),
}
}
let lambda = lambda?;
let res = visit_lambda("func".to_string(), &lambda);
Some(res.format(filename, line))
}
fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult {
// grab the arguments
let param_block = pprint_args(&lambda);
// find the doc comment
let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string());
SearchResult {
identifier: name,
doc: comment,
param_block,
}
}
fn find_comment(node: SyntaxNode) -> Option<String> {
let mut node = NodeOrToken::Node(node);
let mut comments = Vec::new();
loop {
loop {
if let Some(new) = node.prev_sibling_or_token() {
node = new;
break;
} else {
node = NodeOrToken::Node(node.parent()?);
}
}
match node.kind() {
TOKEN_COMMENT => match &node {
NodeOrToken::Token(token) => comments.push(token.text().clone()),
NodeOrToken::Node(_) => unreachable!(),
},
// This stuff is found as part of `the-fn = f: ...`
// here: ^^^^^^^^
NODE_KEY | TOKEN_ASSIGN => (),
t if t.is_trivia() => (),
_ => break,
}
}
let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str()));
Some(doc).filter(|it| !it.is_empty())
}
/// Get the docs for a function in the given file path at the given file position and return it as
/// a C string pointer
#[no_mangle]
pub extern "C" fn nd_get_function_docs(
filename: *const c_char,
line: usize,
col: usize,
) -> *const c_char {
let fname = unsafe { CStr::from_ptr(filename) };
fname
.to_str()
.ok()
.and_then(|f| {
panic::catch_unwind(|| get_function_docs(f, line, col))
.map_err(|e| {
eprintln!("panic!! {:#?}", e);
e
})
.ok()
})
.flatten()
.and_then(|s| CString::new(s).ok())
.map(|s| s.into_raw() as *const c_char)
.unwrap_or(ptr::null())
}
/// Call this to free a string from nd_get_function_docs
#[no_mangle]
pub extern "C" fn nd_free_string(s: *const c_char) {
unsafe {
// cast note: this cast is turning something that was cast to const
// back to mut
drop(CString::from_raw(s as *mut c_char));
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bytepos() {
let fakefile = "abc\ndef\nghi";
assert_eq!(find_pos(fakefile, 2, 2), 5);
}
#[test]
fn test_bytepos_cursed() {
let fakefile = "abc\rdef\r\nghi";
assert_eq!(find_pos(fakefile, 2, 2), 5);
assert_eq!(find_pos(fakefile, 3, 2), 10);
}
#[test]
fn test_comment_stripping() {
let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"];
assert_eq!(
cleanup_comments(&mut ex1.iter()),
"blah blah blah\n foooo baaar\nblah"
);
let ex2 = ["# a1", "# a2", "# aa"];
assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1");
}
#[test]
fn test_dedent() {
let ex1 = "a\n b\n c\n d";
assert_eq!(dedent_comment(ex1), "a\nb\nc\n d");
let ex2 = "a\nb\nc";
assert_eq!(dedent_comment(ex2), ex2);
let ex3 = " a\n b\n\n c";
assert_eq!(dedent_comment(ex3), "a\nb\n\n c");
}
#[test]
fn test_single_line_comment_stripping() {
let ex1 = " * a";
let ex2 = " # a";
let ex3 = " a";
let ex4 = " *";
assert_eq!(cleanup_single_line(ex1), " a");
assert_eq!(cleanup_single_line(ex2), " a");
assert_eq!(cleanup_single_line(ex3), ex3);
assert_eq!(cleanup_single_line(ex4), "");
}
#[test]
fn test_single_line_retains_bold_headings() {
let ex1 = " **Foo**:";
assert_eq!(cleanup_single_line(ex1), ex1);
}
}

View file

@ -1,40 +0,0 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace
//
// SPDX-License-Identifier: BSD-2-Clause OR MIT
use rnix::types::{Lambda, TypedNode};
use rnix::SyntaxKind::*;
/// Pretty-prints the arguments to a function
pub fn pprint_args(lambda: &Lambda) -> String {
// TODO: handle docs directly on NODE_IDENT args (uncommon case)
let mut lambda = lambda.clone();
let mut out = String::new();
loop {
let arg = lambda.arg().unwrap();
match arg.kind() {
NODE_IDENT => {
out += &format!("*{}*", &arg.to_string());
out.push_str(": ");
let body = lambda.body().unwrap();
if body.kind() == NODE_LAMBDA {
lambda = Lambda::cast(body).unwrap();
} else {
break;
}
}
NODE_PATTERN => {
out += &format!("*{}*", &arg.to_string());
out.push_str(": ");
break;
}
t => {
unreachable!("unhandled arg type {:?}", t);
}
}
}
out.push_str("...");
out
//pprint_arg(lambda.arg());
}

View file

@ -286,7 +286,9 @@ deps += nlohmann_json
#
# FIXME: build this with meson in the future after we drop Make (with which we
# *absolutely* are not going to make it work)
lix_doc = declare_dependency(link_args : [ '-llix_doc' ])
lix_doc_project = subproject('lix-doc')
lix_doc = lix_doc_project.get_variable('lix_doc_dep')
#lix_doc = dependency('lix-doc-0.0.1-rs')
deps += lix_doc
#

View file

@ -38,6 +38,7 @@
pkg-config,
python3,
rapidcheck,
rustPlatform,
sqlite,
toml11,
util-linuxMinimal ? utillinuxMinimal,
@ -46,9 +47,6 @@
busybox-sandbox-shell,
# internal fork of nix-doc providing :doc in the repl
lix-doc ? __forDefaults.lix-doc,
pname ? "nix",
versionSuffix ? "",
officialRelease ? false,
@ -73,7 +71,6 @@
];
};
lix-doc = pkgs.callPackage ./lix-doc/package.nix { };
build-release-notes = pkgs.callPackage ./maintainers/build-release-notes.nix { };
},
}:
@ -211,6 +208,8 @@ stdenv.mkDerivation (finalAttrs: {
meson
ninja
cmake
rustPlatform.cargoSetupHook
rustPlatform.cargoBuildHook
]
++ [
(lib.getBin lowdown)
@ -244,7 +243,6 @@ stdenv.mkDerivation (finalAttrs: {
lowdown
libsodium
toml11
lix-doc
]
++ lib.optionals stdenv.hostPlatform.isLinux [
libseccomp
@ -419,6 +417,14 @@ stdenv.mkDerivation (finalAttrs: {
# Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above.
stdenv.cc
]
++ [
pkgs.rust-analyzer
pkgs.cargo
pkgs.rustc
pkgs.rustfmt
pkgs.rustPlatform.rustLibSrc
pkgs.rustPlatform.rustcSrc
]
++ lib.optionals stdenv.cc.isClang [
# Required for clang-tidy checks.
llvmPackages.llvm

95
subprojects/lix-doc/Cargo.lock generated Normal file
View file

@ -0,0 +1,95 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "countme"
version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
[[package]]
name = "dissimilar"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86e3bdc80eee6e16b2b6b0f87fbc98c04bee3455e35174c0de1a125d0688c632"
[[package]]
name = "expect-test"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30d9eafeadd538e68fb28016364c9732d78e420b9ff8853fa5e4058861e9f8d3"
dependencies = [
"dissimilar",
"once_cell",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "lix-doc"
version = "0.0.1"
dependencies = [
"expect-test",
"rnix",
"rowan",
]
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "rnix"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb35cedbeb70e0ccabef2a31bcff0aebd114f19566086300b8f42c725fc2cb5f"
dependencies = [
"rowan",
]
[[package]]
name = "rowan"
version = "0.15.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49"
dependencies = [
"countme",
"hashbrown",
"memoffset",
"rustc-hash",
"text-size",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "text-size"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"

View file

@ -1,6 +1,6 @@
[package]
description = "Nix function documentation tool, stripped down into a library"
edition = "2018"
edition = "2021"
name = "lix-doc"
version = "0.0.1"
license = "BSD-2-Clause OR MIT"
@ -12,7 +12,9 @@ repository = "https://github.com/lf-/nix-doc"
crate_type = ["staticlib"]
[dependencies]
rnix = "0.8.0"
rnix = "0.11.0"
# Necessary because rnix fails to export a critical trait (Rowan's AstNode).
rowan = "0.15.0"
[dev-dependencies]
expect-test = "1.1.0"

View file

@ -0,0 +1,7 @@
project('lix-doc')
meson_cargo = subproject('meson-cargo')
lix_doc = custom_target('lix-doc', input: '.' / 'Cargo.toml', output : 'liblix_doc.a', build_by_default : true, kwargs : meson_cargo.get_variable('build_dict') + { 'env' : meson_cargo.get_variable('build_env') + { 'MCARGO_PROFILE' : 'release', 'MCARGO_CRATE_TYPE' : 'staticlib' }})
lix_doc_dep = declare_dependency(link_with : lix_doc)

View file

@ -5,4 +5,6 @@ rustPlatform.buildRustPackage {
cargoLock.lockFile = ./Cargo.lock;
src = lib.cleanSource ./.;
doCheck = false;
}

View file

@ -0,0 +1,14 @@
{
pkgs ? import <nixpkgs> { },
}:
let
package = pkgs.callPackage ./package.nix { };
in
pkgs.mkShell {
packages = with pkgs; [
rust-analyzer
cargo
rustPlatform.rustLibSrc
];
inputsFrom = [ package ];
}

View file

@ -0,0 +1,602 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace
// SPDX-FileCopyrightText: 2024 Lunaphied
// SPDX-License-Identifier: BSD-2-Clause OR MIT
//! library components of nix-doc
pub mod pprint;
use crate::pprint::pprint_args;
use rnix::ast::{self, Lambda};
use rnix::{NodeOrToken, SyntaxKind};
use rnix::SyntaxNode;
// Needed because rnix fucked up and didn't reexport this, oops.
use rowan::ast::AstNode;
use std::ffi::{CStr, CString};
use std::fs;
use std::os::raw::c_char;
use std::panic;
use std::ptr;
use std::{fmt::Display, str};
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
const DOC_INDENT: usize = 0;
struct SearchResult {
/// Name of the function
identifier: String,
/// Dedented documentation comment
doc: String,
/// Parameter block for the function
param_block: String,
}
impl SearchResult {
fn format<P: Display>(&self, filename: P, line: usize) -> String {
format!(
"**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
self.identifier.as_str(),
self.param_block,
indented(&self.doc, DOC_INDENT),
format!("{}:{}", filename, line).as_str(),
)
}
}
/// Converts Nix compatible line endings (Nix accepts `\r`, `\n`, *and* `\r\n` as endings), to
/// standard `\n` endings for use within Rust land.
fn convert_endings(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut it = s.chars().peekable();
while let Some(ch) = it.next() {
if ch == '\n' || ch == '\r' {
out.push('\n');
if ch == '\r' && it.peek().map(|&c| c == '\n').unwrap_or(false) {
// Consume `\n` in `\r\n`.
it.next();
}
} else {
out.push(ch);
}
}
out
}
/// Converts the position information from Lix itself into an character index into the file itself.
/// Expects an input string that's already had it's line endings normalized.
///
/// Note that this returns a *byte* offset, not a character offset.
fn find_pos(s: &str, line: usize, col: usize) -> usize {
// Nix line positions are 1-indexed.
let mut lines = 1;
for (byte_pos, ch) in s.char_indices() {
// If we find a newline, increase the line count.
if ch == '\n' {
lines += 1;
}
// We've arrived at the correct line.
if lines == line {
// Column position is 1-indexed, and it's a *byte* offset, because Nix doesn't actually
// support UTF-8. Rust does though, so we need to convert to a proper byte index to
// match rnix.
return byte_pos + col + 1;
}
}
// If things never match that should be literally impossible.
unreachable!();
}
/// Represents a forwarded token from rnix's AST over to lix-doc.
#[derive(Debug, Clone)]
enum DocToken {
Comment(String),
Whitespace(String),
}
/// Determine if a given token string contains more than two newlines, this is used to determine when
/// we hit blank lines between comments indicating a contextually unrelated comment.
fn has_empty_line(tok: &DocToken) -> bool {
// It's either solely whitespace with two newlines inside somewhere, or it's
// contained inside a comment token and we don't want to count that as empty.
if let DocToken::Whitespace(s) = tok {
s.chars().filter(|&c| c == '\n').take(2).count() == 2
} else {
false
}
}
/// Emits a string `s` indented by `indent` spaces
fn indented(s: &str, indent: usize) -> String {
let indent_s = " ".repeat(indent);
s.split('\n')
.map(|line| indent_s.clone() + line)
.collect::<Vec<_>>()
.join("\n")
}
/// Cleans up a single line, erasing prefix single line comments but preserving indentation
// TODO: FIXME: this isn't really the best, because we'll struggle with comments that put something
// immediately after *'s at the start of each line, such as `*>` in `lib.generators.toGitINI`,
// not only that but sometimes * at the start of a line means list item in a multiline comment that doesn't
// use leading characters, and sometimes it's just a leading * to signify a line that's part of a
// doc comment, ala javadoc style.
fn cleanup_single_line(s: &str) -> &str {
let mut cmt_new_start = 0;
let mut iter = s.char_indices().peekable();
while let Some((idx, ch)) = iter.next() {
// peek at the next character, with an explicit '\n' as "next character" at end of line
let (_, _next_ch) = iter.peek().unwrap_or(&(0, '\n'));
// if we find a character, save the byte position after it as our new string start
if ch == '#' || (ch == '*') {
cmt_new_start = idx + 1;
break;
}
// if, instead, we are on a line with no starting comment characters, leave it alone as it
// will be handled by dedent later
if !ch.is_whitespace() {
break;
}
}
&s[cmt_new_start..]
}
/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking
/// at the second line if it can.
fn dedent_comment(s: &str) -> String {
let mut whitespaces = 0;
let mut lines = s.lines();
let first = lines.next();
// scan for whitespace
for line in lines.chain(first) {
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
if line_whitespace != line.len() {
// a non-whitespace line, perfect for taking whitespace off of
whitespaces = line_whitespace;
break;
}
}
// maybe the first considered line we found was indented further, so let's look for more lines
// that might have a shorter indent. In the case of one line, do nothing.
//for line in s.lines().skip(1) {
// let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
//
// if line_whitespace != line.len() {
// whitespaces = line_whitespace.min(whitespaces);
// }
//}
// delete up to `whitespaces` whitespace characters from each line and reconstitute the string
let mut out = String::new();
for line in s.lines() {
let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
out.push_str(&line[content_begin.min(whitespaces)..]);
out.push('\n');
}
out.truncate(out.trim_end_matches('\n').len());
out
}
/// Takes a series of comment and whitespace strings and output a clean single block of text to use
/// as the output documentation comment block.
///
/// This function expects to be given the tokens in reverse order (proceeding upwards from the
/// first comment above the definitions), this allows us to properly enforce the below conditions.
/// The output from this function will be reordered and ready for display.
///
/// The two types of documentation comments we expect are:
///
/// - A single multiline comment not whitespace separated from the start.
/// - A series of back to back single line comments not separated by whitespace.
///
/// Any other combination will be filtered out.
///
/// Once an empty line is encountered, we know no more valid documentation comments remain and stop.
fn cleanup_comments<I: DoubleEndedIterator<Item = DocToken>>(tokens: &mut I) -> String {
// Keep track of when we've found a single line and multiline comment, we use this to
// only process a single multiline or back to back single lines.
let mut found_single_line = false;
// Comments that have survived our filtering phase and should be cleaned up.
let mut valid = vec![];
// Filter out comments that don't meet the characteristics of documentation comments.
for tok in tokens {
//dbg!(&tok);
if has_empty_line(&tok) {
// Take tokens until we hit whitespace containing an empty line.
break;
}
// Only care about comments from this point on.
if let DocToken::Comment(comment) = tok {
// Now determine if it's a single line comment.
let is_single_line = comment.starts_with('#');
// We've found a single line comment if we've found one before or we just found one.
found_single_line |= is_single_line;
// What we do next is only special when we hit a multiline comment.
if !is_single_line {
// If we've hit a multiline comment as our first comment, take that one alone.
if !found_single_line {
// Otherwise we've hit a multiline comment immediately and this is our
// one and only doc comment to worry about.
valid.push(comment);
}
// Otherwise we've hit a multiline comment after single line comments, in either
// case this means we're done processing comments.
break;
}
// Otherwise this is a new single line comment to push to the stack.
valid.push(comment);
}
}
//dbg!(&valid);
// Cleanup comments for user consumption.
dedent_comment(
&valid
.into_iter()
.rev()
.map(|small_comment| {
small_comment
// Trim off start of multiline comments.
.trim_start_matches("/*")
// Trim off end of multiline comments.
.trim_end_matches("*/")
// Trim off any internal whitespace that's trapped inside comments themselves.
.trim()
// Split comments by newlines to extract lines of multiline comments.
.split('\n')
// Cleanup single line comments and a few more tweaks for multiline comments.
.map(cleanup_single_line)
//.inspect(|&x| { dbg!(x); })
.collect::<Vec<_>>()
// Reconstruct the multiline comment's whitespace.
.join("\n")
})
//.inspect(|x| { dbg!(x); })
.collect::<Vec<_>>()
// We've found that when multiple back to back single line comments are used in Nixpkgs,
// they make more sense to represent as if someone inserted line breaks into the Markdown
// properly, so we join them with linebreaks that markdown will pass through.
.join("\n\n"),
)
}
/// Get the docs for a specific function
pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
let content = fs::read(filename).ok()?;
let decoded = convert_endings(str::from_utf8(&content).ok()?);
let pos = find_pos(&decoded, line, col);
let rowan_pos = rnix::TextSize::from(pos as u32);
// The minimum length of a lambda is 4 characters and thus the range we're looking for must be
// at least 4 characters long `_: 3` being an example of a minimal length lambda.
let rowan_range = rnix::TextRange::at(rowan_pos, 4.into());
eprintln!("**pos: {pos}**\n**file: {filename}**\n**line: {line}**\n**col: {col}**\n");
//eprintln!("content:");
//let meow = || {
// let mut it = decoded.char_indices().enumerate().peekable();
// while let Some(this) = it.next() {
// let Some(next) = it.peek() else { return };
// if next.0 != next.1.0 {
// dbg!(this, next);
// break;
// }
// }
//};
//
//meow();
//let content = &decoded[pos-10..pos+100];
//let mut out_pos = pos-10;
//for line in content.lines() {
// let mut second = String::with_capacity(line.len());
// for ch in line.chars() {
// eprint!("{}", ch);
// if out_pos >= pos {
// second.push('^');
// } else {
// second.push('*');
// }
// out_pos += 1;
// }
// eprintln!();
// eprintln!("{}", second);
// // For the newline
// out_pos += 1;
//}
//
//eprintln!("content:\n{}", &decoded[pos-10..pos+100]);
// TODO: Improve error reporting?
let root = rnix::Root::parse(&decoded).ok().ok()?;
// Extract the inner expression that represents the Root node and extract the top level expression.
let expr = root.expr()?;
// There are roughly three(?) cases we have to handle in increasing order of complexity:
// 1. A straightforward definition with an attrset binding directly to a lambda after the =,
// where that lambda is provided in the same file. This means we can simply find the deepest
// AttrsetValue node that contains the provided position and then extract it's inner lambda.
// 2. The same as above, except we're provided with the lambda inside a let-in which Lix
// doesn't give us the inside of, (Lix will only handle resolving any intermediate
// expressions to get the final one that will resolve to the Lambda itself, evidently this
// does not include let-in).
// 3. The same two cases as above, except the definitions themselves are inside a separate file
// from the binding that references them, in this case Lix just gives us that final file, so
// we don't get any visability into the AST nodes where the file is imported, thus we have
// to stub in a name since we won't be able to find the binding.
// Find the deepest node or token that covers the position given by Lix.
let covering = expr.syntax().covering_element(rowan_range);
let mut lambda = None;
for ancestor in covering.ancestors() {
if ancestor.kind() == SyntaxKind::NODE_LAMBDA {
//dbg!(&ancestor);
lambda = Some(ancestor);
break;
}
}
// There is literally always a lambda or something has gone very very wrong.
let lambda = ast::Lambda::cast(lambda.unwrap()).unwrap();
// If possible, try to convert covering to an attrpath.
let mut binding = None;
for ancestor in lambda.syntax().ancestors() {
if ancestor.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
binding = Some(ancestor);
}
}
// Covering at this point is either a binding (`NODE_ATTRPATH_VALUE`) or a lambda (`NODE_LAMBDA`) if we never
// found any bindings in this file.
let identifier;
identifier = match binding.clone() {
Some(binding) => ast::AttrpathValue::cast(binding)
.unwrap()
.attrpath()
.unwrap()
.to_string(),
_ => "<unknown binding>".to_string(),
};
// Find all the comments on the binding or the lambda if we have to fall back.
let comment_node = binding.as_ref().unwrap_or(lambda.syntax());
let comment = find_comment(comment_node).unwrap_or_else(String::new);
// And display them properly for the markdown function in Lix.
Some(visit_lambda(identifier, comment, &lambda).format(filename, line))
//Some(comment)
}
fn visit_lambda(name: String, comment: String, lambda: &Lambda) -> SearchResult {
// grab the arguments
let param_block = pprint_args(lambda);
SearchResult {
identifier: name,
doc: comment,
param_block,
}
}
fn find_comment(node: &SyntaxNode) -> Option<String> {
let comments = node
.siblings_with_tokens(rowan::Direction::Prev)
//.inspect(|v| { dbg!(v); } )
// Skip ourselves and the first whitespace token which will be the first tokens given.
.skip(2)
.map_while(|element| match element {
NodeOrToken::Token(token) => {
match token.kind() {
// Map the tokens we're interested in to our internal token type.
SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())),
SyntaxKind::TOKEN_WHITESPACE => {
Some(DocToken::Whitespace(token.text().to_owned()))
}
// If we hit a different token type, we know we've gone past relevant comments
// and should stop.
_ => None,
}
}
// If we hit a node entry we've definitely gone past comments that would be related to
// this node and we should retreat.
_ => None,
})
//.inspect(|x| { dbg!(x); })
.collect::<Vec<_>>();
//dbg!(&comments);
Some(cleanup_comments(&mut comments.into_iter())).filter(|c| !c.is_empty())
}
/// Get the docs for a function in the given file path at the given file position and return it as
/// a C string pointer
#[no_mangle]
pub extern "C" fn nd_get_function_docs(
filename: *const c_char,
line: usize,
col: usize,
) -> *const c_char {
let fname = unsafe { CStr::from_ptr(filename) };
fname
.to_str()
.ok()
.and_then(|f| {
panic::catch_unwind(|| get_function_docs(f, line, col))
.map_err(|e| {
eprintln!("panic!! {:#?}", e);
e
})
.ok()
})
.flatten()
.and_then(|s| CString::new(s).ok())
.map(|s| s.into_raw() as *const c_char)
.unwrap_or(ptr::null())
}
/// Call this to free a string from nd_get_function_docs
#[no_mangle]
pub extern "C" fn nd_free_string(s: *const c_char) {
unsafe {
// cast note: this cast is turning something that was cast to const
// back to mut
drop(CString::from_raw(s as *mut c_char));
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_line_conversion() {
let fakefile = "abc\rdef\r\nghi";
assert_eq!(convert_endings(fakefile), "abc\ndef\nghi");
}
#[test]
fn test_bytepos() {
let fakefile = "abc\ndef\nghi";
assert_eq!(find_pos(fakefile, 2, 2), 5);
}
#[test]
fn test_bytepos_cursed() {
let fakefile = convert_endings("abc\rdef\r\nghi");
assert_eq!(find_pos(&fakefile, 2, 2), 5);
assert_eq!(find_pos(&fakefile, 3, 2), 9);
}
#[test]
fn test_comment_stripping() {
let ex1 = [DocToken::Comment(
"/* blah blah blah\n foooo baaar\n blah */".to_string(),
)];
assert_eq!(
cleanup_comments(&mut ex1.into_iter()),
"blah blah blah\n foooo baaar\nblah"
);
let ex2 = ["# a1", "# a2", "# aa"]
.into_iter()
.map(|s| DocToken::Comment(s.to_string()));
assert_eq!(cleanup_comments(&mut ex2.into_iter()), "aa\n\n a2\n\na1");
}
#[test]
fn test_dedent() {
let ex1 = "a\n b\n c\n d";
assert_eq!(dedent_comment(ex1), "a\nb\nc\n d");
let ex2 = "a\nb\nc";
assert_eq!(dedent_comment(ex2), ex2);
let ex3 = " a\n b\n\n c";
assert_eq!(dedent_comment(ex3), "a\nb\n\n c");
}
#[test]
fn test_single_line_comment_stripping() {
let ex1 = " * a";
let ex2 = " # a";
let ex3 = " a";
let ex4 = " *";
assert_eq!(cleanup_single_line(ex1), " a");
assert_eq!(cleanup_single_line(ex2), " a");
assert_eq!(cleanup_single_line(ex3), ex3);
assert_eq!(cleanup_single_line(ex4), "");
}
#[test]
fn test_single_line_retains_bold_headings() {
let ex1 = " **Foo**:";
assert_eq!(cleanup_single_line(ex1), ex1);
}
#[test]
fn test_commented_args() {
let source = r#"
# hiell
{
# A comment
a,
# B comment
b,
# Ellipsis double comment
# Line two
...
}: 5
"#;
let parsed = ast::Root::parse(source);
if let Some(ast::Expr::Lambda(lambda)) = parsed.tree().expr() {
dbg!(pprint_args(&lambda));
}
dbg!(find_comment(parsed.tree().expr().unwrap().syntax()));
assert!(false);
}
#[test]
fn test_meow() {
let testcase = r#"
rec {
/*
Hello
23
This is a comment.
this is another comment.
and this is a third comment.
Way
go
*/
meow = { g }: {a, b ? 4, ...}: g: c: 5;
# And another comment.
cat = 34;
# inner layer.
"inner-layer" = outer: meow;
}
"#;
let location = dbg!(testcase.find("meow").unwrap() as u32);
let parsed = rnix::Root::parse(testcase);
let parsed = parsed.tree().expr().unwrap().syntax().to_owned();
for thing in parsed.preorder() {
match thing {
rnix::WalkEvent::Enter(inner) => {
if inner.kind() == rnix::SyntaxKind::NODE_ATTRPATH_VALUE {
if inner.text_range().contains_inclusive(location.into()) {
dbg!(&inner);
dbg!(find_comment(&inner));
eprintln!("{}", find_comment(&inner).unwrap_or("".to_string()));
}
}
}
_ => {}
}
}
assert!(false);
}
}

View file

@ -0,0 +1,63 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace
// SPDX-FileCopyrightText: 2024 Lunaphied
// SPDX-License-Identifier: BSD-2-Clause OR MIT
use rnix::ast::{Expr, Lambda};
use rowan::ast::AstNode;
/// Pretty-prints the arguments to a function
pub fn pprint_args(lambda: &Lambda) -> String {
// TODO: handle docs directly on NODE_IDENT args (uncommon case)
let mut lambda = lambda.clone();
let mut depth = 0;
let mut out = String::new();
loop {
let arg = lambda.param().unwrap();
for child in arg.syntax().children_with_tokens() {
//dbg!(child.kind());
match child {
rowan::NodeOrToken::Node(node) => {
out.push_str(&node.text().to_string());
if node.kind() == rnix::SyntaxKind::NODE_PAT_ENTRY {
out.push_str(&",\n");
}
}
rowan::NodeOrToken::Token(token) => {
use rnix::SyntaxKind::{
TOKEN_COMMENT, TOKEN_ELLIPSIS, TOKEN_L_BRACE, TOKEN_QUESTION, TOKEN_R_BRACE,
};
match token.kind() {
TOKEN_COMMENT | TOKEN_ELLIPSIS | TOKEN_QUESTION | TOKEN_L_BRACE
| TOKEN_R_BRACE => {
//dbg!(&token);
out.push_str(&token.text().to_string());
if token.kind() == TOKEN_COMMENT {
out.push('\n');
}
}
_ => {}
}
//out.push_str(&token.text().to_string());
}
}
}
out.push_str(": ");
let body = lambda.body().unwrap();
if let Expr::Lambda(inner) = body {
lambda = inner;
// If we recurse we want the next line of recursion to be indented and on a new line.
out.push('\n');
for _ in 0..=depth {
out.push('\t');
}
depth += 1;
} else {
// If we don't find an inner lambda we're done with argument handling.
break;
}
}
out.push_str("...");
out
//pprint_arg(lambda.arg());
}

@ -0,0 +1 @@
Subproject commit 5566213be71ae204628952ebc076d1641e46f37c