lix-doc: update dependencies and refactor

This updates the version of rnix used and refactors the code generally
to be more precise and capable in it's identification of both lambdas
and determining which documentation comments are attached.

Change-Id: Ib0dddabd71f772c95077f9d7654023b37a7a1fd2
This commit is contained in:
Lunaphied 2024-06-18 16:24:49 -06:00 committed by lunaphied
parent 6e0ca02425
commit 41963df4a5
6 changed files with 395 additions and 247 deletions

114
lix-doc/Cargo.lock generated
View file

@ -9,13 +9,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]] [[package]]
name = "cbitset" name = "countme"
version = "0.2.0" version = "3.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29b6ad25ae296159fb0da12b970b2fe179b234584d7cd294c891e2bbb284466b" checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636"
dependencies = [
"num-traits",
]
[[package]] [[package]]
name = "dissimilar" name = "dissimilar"
@ -33,19 +30,26 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]] [[package]]
name = "lix-doc" name = "lix-doc"
version = "0.0.1" version = "0.0.1"
dependencies = [ dependencies = [
"expect-test", "expect-test",
"rnix", "rnix",
"rowan",
] ]
[[package]] [[package]]
name = "num-traits" name = "memoffset"
version = "0.2.18" version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [ dependencies = [
"autocfg", "autocfg",
] ]
@ -56,44 +60,26 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]] [[package]]
name = "rnix" name = "rnix"
version = "0.8.1" version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a9b645f0edba447dbfc6473dd22999f46a1d00ab39e777a2713a1cf34a1597b" checksum = "bb35cedbeb70e0ccabef2a31bcff0aebd114f19566086300b8f42c725fc2cb5f"
dependencies = [ dependencies = [
"cbitset",
"rowan", "rowan",
] ]
[[package]] [[package]]
name = "rowan" name = "rowan"
version = "0.9.1" version = "0.15.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ea7cadf87a9d8432e85cb4eb86bd2e765ace60c24ef86e79084dcae5d1c5a19" checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49"
dependencies = [ dependencies = [
"countme",
"hashbrown",
"memoffset",
"rustc-hash", "rustc-hash",
"smol_str", "text-size",
"text_unit",
"thin-dst",
] ]
[[package]] [[package]]
@ -103,59 +89,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "serde" name = "text-size"
version = "1.0.197" version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "smol_str"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9"
dependencies = [
"serde",
]
[[package]]
name = "syn"
version = "2.0.53"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "text_unit"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20431e104bfecc1a40872578dbc390e10290a0e9c35fffe3ce6f73c15a9dbfc2"
[[package]]
name = "thin-dst"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e"
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

View file

@ -1,6 +1,6 @@
[package] [package]
description = "Nix function documentation tool, stripped down into a library" description = "Nix function documentation tool, stripped down into a library"
edition = "2018" edition = "2021"
name = "lix-doc" name = "lix-doc"
version = "0.0.1" version = "0.0.1"
license = "BSD-2-Clause OR MIT" license = "BSD-2-Clause OR MIT"
@ -12,7 +12,9 @@ repository = "https://github.com/lf-/nix-doc"
crate_type = ["staticlib"] crate_type = ["staticlib"]
[dependencies] [dependencies]
rnix = "0.8.0" rnix = "0.11.0"
# Necessary because rnix fails to export a critical trait (Rowan's AstNode).
rowan = "0.15.0"
[dev-dependencies] [dev-dependencies]
expect-test = "1.1.0" expect-test = "1.1.0"

View file

@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace // SPDX-FileCopyrightText: 2024 Jade Lovelace
// // SPDX-FileCopyrightText: 2024 Lunaphied
// SPDX-License-Identifier: BSD-2-Clause OR MIT // SPDX-License-Identifier: BSD-2-Clause OR MIT
//! library components of nix-doc //! library components of nix-doc
@ -7,13 +7,16 @@ pub mod pprint;
use crate::pprint::pprint_args; use crate::pprint::pprint_args;
use rnix::types::{Lambda, TypedNode}; use rnix::ast::{self, Lambda};
use rnix::SyntaxKind::*; use rnix::{NodeOrToken, SyntaxKind};
use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent}; use rnix::SyntaxNode;
// Needed because rnix fucked up and didn't reexport this, oops.
use rowan::ast::AstNode;
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::fs; use std::fs;
use std::iter;
use std::os::raw::c_char; use std::os::raw::c_char;
use std::panic; use std::panic;
@ -23,66 +26,104 @@ use std::{fmt::Display, str};
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
const DOC_INDENT: usize = 3;
struct SearchResult { struct SearchResult {
/// Name of the function /// Name of the function
identifier: String, identifier: String,
/// Dedented documentation comments /// Dedented documentation comment
doc: String, doc: String,
/// Parameter block for the function /// Parameter block for the function
param_block: String, param_block: String,
} }
fn find_pos(file: &str, line: usize, col: usize) -> usize {
let mut lines = 1;
let mut line_start = 0;
let mut it = file.chars().enumerate().peekable();
while let Some((count, ch)) = it.next() {
if ch == '\n' || ch == '\r' {
lines += 1;
let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') {
it.next();
1
} else {
0
};
line_start = count + addend;
}
let col_diff = ((count as i32) - (line_start as i32)).abs() as usize;
if lines == line && col_diff == col {
return count;
}
}
unreachable!();
}
impl SearchResult { impl SearchResult {
fn format<P: Display>(&self, filename: P, line: usize) -> String { fn format<P: Display>(&self, filename: P, line: usize) -> String {
format!( format!(
"**Synopsis:** `{}` = {}\n\n{}\n\n# {}", "**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
self.identifier.as_str(), self.identifier.as_str(),
self.param_block, self.param_block,
indented(&self.doc, DOC_INDENT), self.doc,
format!("{}:{}", filename, line).as_str(), format!("{}:{}", filename, line).as_str(),
) )
} }
} }
/// Emits a string `s` indented by `indent` spaces /// Converts Nix compatible line endings (Nix accepts `\r`, `\n`, *and* `\r\n` as endings), to
fn indented(s: &str, indent: usize) -> String { /// standard `\n` endings for use within Rust land.
let indent_s = iter::repeat(' ').take(indent).collect::<String>(); fn convert_endings(s: &str) -> String {
s.split('\n') let mut out = String::with_capacity(s.len());
.map(|line| indent_s.clone() + line) let mut it = s.chars().peekable();
.collect::<Vec<_>>()
.join("\n") while let Some(ch) = it.next() {
if ch == '\n' || ch == '\r' {
out.push('\n');
if ch == '\r' && it.peek().map(|&c| c == '\n').unwrap_or(false) {
// Consume `\n` in `\r\n`.
it.next();
}
} else {
out.push(ch);
}
}
out
}
/// Converts the position information from Lix itself into an character index into the file itself.
/// Expects an input string that's already had it's line endings normalized.
///
/// Note that this returns a *byte* offset, not a character offset.
fn find_pos(s: &str, line: usize, col: usize) -> usize {
// Nix line positions are 1-indexed.
let mut lines = 1;
for (byte_pos, ch) in s.char_indices() {
// If we find a newline, increase the line count.
if ch == '\n' {
lines += 1;
}
// We've arrived at the correct line.
if lines == line {
// Column position is 1-indexed, and it's a *byte* offset, because Nix doesn't actually
// support UTF-8. Rust does though, so we need to convert to a proper byte index to
// match rnix. Lix also doesn't consider the line endings part of the column offset so
// we implicitly add one to advance to the character *after* that.
return byte_pos + col;
}
}
// If things never match that should be literally impossible.
unreachable!();
}
/// Represents a forwarded token from rnix's AST over to lix-doc.
#[derive(Debug, Clone)]
enum DocToken {
Comment(String),
Whitespace(String),
}
/// Determine if a given token string contains more than two newlines, this is used to determine when
/// we hit blank lines between comments indicating a contextually unrelated comment.
fn has_empty_line(tok: &DocToken) -> bool {
// It's either solely whitespace with two newlines inside somewhere, or it's
// contained inside a comment token and we don't want to count that as empty.
if let DocToken::Whitespace(s) = tok {
s.chars().filter(|&c| c == '\n').take(2).count() == 2
} else {
false
}
} }
/// Cleans up a single line, erasing prefix single line comments but preserving indentation /// Cleans up a single line, erasing prefix single line comments but preserving indentation
fn cleanup_single_line<'a>(s: &'a str) -> &'a str { // NOTE: We have a bit of a conflict of interest problem here due to the inconsistent format of
// doc comments. Some doc comments will use a series of single line comments that may then contain `*`
// characters to represent a list. Some will be multiline comments that don't prefix individual lines
// with `*`, only using them for lists directly, and some will prefix lines with `*` as a leading
// character to mark the block. There's no way to disambiguate all three, but we do our best to
// make the common case pretty.
fn cleanup_single_line(s: &str) -> &str {
let mut cmt_new_start = 0; let mut cmt_new_start = 0;
let mut iter = s.char_indices().peekable(); let mut iter = s.char_indices().peekable();
while let Some((idx, ch)) = iter.next() { while let Some((idx, ch)) = iter.next() {
@ -90,7 +131,9 @@ fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
let (_, next_ch) = iter.peek().unwrap_or(&(0, '\n')); let (_, next_ch) = iter.peek().unwrap_or(&(0, '\n'));
// if we find a character, save the byte position after it as our new string start // if we find a character, save the byte position after it as our new string start
if ch == '#' || (ch == '*' && next_ch.is_whitespace()) { // This has special handling for `>` because some Nixpkgs documentation has `*>` right
// after the start of their doc comments, and we want to strip the `*` still.
if ch == '#' || (ch == '*' && (*next_ch == '>' || next_ch.is_whitespace())) {
cmt_new_start = idx + 1; cmt_new_start = idx + 1;
break; break;
} }
@ -103,15 +146,12 @@ fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
&s[cmt_new_start..] &s[cmt_new_start..]
} }
/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking /// Erases indents in comments based on the indentation of the first line.
/// at the second line if it can.
fn dedent_comment(s: &str) -> String { fn dedent_comment(s: &str) -> String {
let mut whitespaces = 0; let mut whitespaces = 0;
let mut lines = s.lines();
let first = lines.next();
// scan for whitespace // scan for whitespace
for line in lines.chain(first) { for line in s.lines() {
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
if line_whitespace != line.len() { if line_whitespace != line.len() {
@ -121,16 +161,6 @@ fn dedent_comment(s: &str) -> String {
} }
} }
// maybe the first considered line we found was indented further, so let's look for more lines
// that might have a shorter indent. In the case of one line, do nothing.
for line in s.lines().skip(1) {
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
if line_whitespace != line.len() {
whitespaces = line_whitespace.min(whitespaces);
}
}
// delete up to `whitespaces` whitespace characters from each line and reconstitute the string // delete up to `whitespaces` whitespace characters from each line and reconstitute the string
let mut out = String::new(); let mut out = String::new();
for line in s.lines() { for line in s.lines() {
@ -143,69 +173,163 @@ fn dedent_comment(s: &str) -> String {
out out
} }
/// Deletes whitespace and leading comment characters /// Takes a series of comment and whitespace strings and output a clean single block of text to use
/// as the output documentation comment block.
/// ///
/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a /// This function expects to be given the tokens in reverse order (proceeding upwards from the
/// multiline comment, they will be deleted. /// first comment above the definitions), this allows us to properly enforce the below conditions.
fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String { /// The output from this function will be reordered and ready for display.
///
/// The two types of documentation comments we expect are:
///
/// - A single multiline comment not whitespace separated from the start.
/// - A series of back to back single line comments not separated by whitespace.
///
/// Any other combination will be filtered out.
///
/// Once an empty line is encountered, we know no more valid documentation comments remain and stop.
fn cleanup_comments<I: Iterator<Item = DocToken>>(tokens: &mut I) -> String {
// Keep track of when we've found a single line and multiline comment, we use this to
// only process a single multiline or back to back single lines.
let mut found_single_line = false;
// Comments that have survived our filtering phase and should be cleaned up.
let mut valid = vec![];
// Filter out comments that don't meet the characteristics of documentation comments.
for tok in tokens {
if has_empty_line(&tok) {
// Take tokens until we hit whitespace containing an empty line.
break;
}
// Only care about comments from this point on.
if let DocToken::Comment(comment) = tok {
// Now determine if it's a single line comment.
let is_single_line = comment.starts_with('#');
// We've found a single line comment if we've found one before or we just found one.
found_single_line |= is_single_line;
// What we do next is only special when we hit a multiline comment.
if !is_single_line {
// If we've hit a multiline comment as our first comment, take that one alone.
if !found_single_line {
// Otherwise we've hit a multiline comment immediately and this is our
// one and only doc comment to worry about.
valid.push(comment);
}
// Otherwise we've hit a multiline comment after single line comments, in either
// case this means we're done processing comments.
break;
}
// Otherwise this is a new single line comment to push to the stack.
valid.push(comment);
}
}
// Cleanup comments for user consumption.
dedent_comment( dedent_comment(
&comment &valid
.into_iter()
.rev() .rev()
.map(|small_comment| { .map(|small_comment| {
small_comment small_comment
.as_ref() // Trim off start of multiline comments.
// space before multiline start
.trim_start()
// multiline starts
.trim_start_matches("/*") .trim_start_matches("/*")
// trailing so we can grab multiline end // Trim off end of multiline comments.
.trim_end()
// multiline ends
.trim_end_matches("*/") .trim_end_matches("*/")
// extra space that was in the multiline // Trim off any internal whitespace that's trapped inside comments themselves.
.trim() .trim()
// Split comments by newlines to extract lines of multiline comments.
.split('\n') .split('\n')
// erase single line comments and such // Cleanup single line comments and a few more tweaks for multiline comments.
.map(cleanup_single_line) .map(cleanup_single_line)
.collect::<Vec<_>>() .collect::<Vec<_>>()
// Reconstruct the multiline comment's whitespace.
.join("\n") .join("\n")
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join("\n"), // We've found that when multiple back to back single line comments are used in Nixpkgs,
// they make more sense to represent as if someone inserted line breaks into the Markdown
// properly, so we join them with linebreaks that markdown will pass through.
.join("\n\n"),
) )
} }
/// Get the docs for a specific function /// Get the docs for a specific function.
// TODO: Improve error reporting?
pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> { pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
let content = fs::read(filename).ok()?; let content = fs::read(filename).ok()?;
let decoded = str::from_utf8(&content).ok()?; let decoded = convert_endings(str::from_utf8(&content).ok()?);
let pos = find_pos(&decoded, line, col); let pos = find_pos(&decoded, line, col);
let rowan_pos = TextUnit::from_usize(pos); let rowan_pos = rnix::TextSize::from(pos as u32);
let tree = rnix::parse(decoded);
// The minimum length of a lambda is 4 characters and thus the range we're looking for must be
// at least 4 characters long `_: 3` being an example of a minimal length lambda.
let rowan_range = rnix::TextRange::at(rowan_pos, 4.into());
// Parse the file using rnix.
let root = rnix::Root::parse(&decoded).ok().ok()?;
// Extract the inner expression that represents the Root node and extract the top level expression.
let expr = root.expr()?;
// There are two cases we have to be able to handle
// 1. A straightforward definition with an attrset binding to a lambda that's defined inline.
// 2. A lambda defined in a standalone file where the attrset binding imports that file directly.
// The latter case will not be able to find the binding so we must be able to handle not finding it.
// Find the deepest node or token that covers the position given by Lix.
let covering = expr.syntax().covering_element(rowan_range);
// Climb up until we find the lambda node that contains that token.
let mut lambda = None; let mut lambda = None;
for node in tree.node().preorder() { for ancestor in covering.ancestors() {
match node { if ancestor.kind() == SyntaxKind::NODE_LAMBDA {
WalkEvent::Enter(n) => { lambda = Some(ancestor);
if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA {
lambda = Lambda::cast(n);
break; break;
} }
} }
WalkEvent::Leave(_) => (),
// There is literally always a lambda or something has gone very very wrong.
let lambda =
ast::Lambda::cast(
lambda.expect("no lambda found; what.")
) .expect("not a rnix::ast::Lambda; what.");
// Search up, hopefully to find the binding so we can get the identifier name.
// TODO: Just provide this directly from the C++ code to make it possible to always have the correct identifier.
let mut binding = None;
for ancestor in lambda.syntax().ancestors() {
if ancestor.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
binding = Some(ancestor);
} }
} }
let lambda = lambda?;
let res = visit_lambda("func".to_string(), &lambda);
Some(res.format(filename, line))
}
fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult { // Convert the binding to an identifier if it was found, otherwise use a placeholder.
let identifier;
identifier = match binding.clone() {
Some(binding) => ast::AttrpathValue::cast(binding)
.expect("not an rnix::ast::AttrpathValue; what")
.attrpath()
.expect("AttrpathValue has no attrpath; what.")
.to_string(),
_ => "<unknown binding>".to_string(),
};
// Find all the comments on the binding or the lambda if we have to fall back.
let comment_node = binding.as_ref().unwrap_or(lambda.syntax());
let comment = find_comment(comment_node).unwrap_or_else(String::new);
// And display them properly for the markdown function in Lix.
Some(visit_lambda(identifier, comment, &lambda).format(filename, line))
}
fn visit_lambda(name: String, comment: String, lambda: &Lambda) -> SearchResult {
// grab the arguments // grab the arguments
let param_block = pprint_args(&lambda); let param_block = pprint_args(lambda);
// find the doc comment
let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string());
SearchResult { SearchResult {
identifier: name, identifier: name,
@ -214,39 +338,47 @@ fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult {
} }
} }
fn find_comment(node: SyntaxNode) -> Option<String> { fn find_comment(node: &SyntaxNode) -> Option<String> {
let mut node = NodeOrToken::Node(node); let mut it = node
let mut comments = Vec::new(); .siblings_with_tokens(rowan::Direction::Prev)
loop { // Skip ourselves as we're always the first token returned.
loop { .skip(1)
if let Some(new) = node.prev_sibling_or_token() { .peekable();
node = new;
break; // Consume up to one whitespace token before the first comment. There might not always be
} else { // whitespace such as the (rather unusual) case of `/* meow */x = a: 3`.
node = NodeOrToken::Node(node.parent()?); if matches!(it.peek(), Some(NodeOrToken::Token(token)) if token.kind() == SyntaxKind::TOKEN_WHITESPACE) {
} it.next();
} }
match node.kind() { let comments = it.map_while(|element| match element {
TOKEN_COMMENT => match &node { NodeOrToken::Token(token) => {
NodeOrToken::Token(token) => comments.push(token.text().clone()), match token.kind() {
NodeOrToken::Node(_) => unreachable!(), // Map the tokens we're interested in to our internal token type.
}, SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())),
// This stuff is found as part of `the-fn = f: ...` SyntaxKind::TOKEN_WHITESPACE => {
// here: ^^^^^^^^ Some(DocToken::Whitespace(token.text().to_owned()))
NODE_KEY | TOKEN_ASSIGN => (), }
t if t.is_trivia() => (), // If we hit a different token type, we know we've gone past relevant comments
_ => break, // and should stop.
_ => None,
} }
} }
let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str())); // If we hit a node entry we've definitely gone past comments that would be related to
Some(doc).filter(|it| !it.is_empty()) // this node and we should retreat.
_ => None,
});
// For the curious, `into_iter()` here consumes the binding producing an owned value allowing us to avoid
// making the original binding mutable, we don't reuse it later so this is a cute way to handle it, though
// there's probably a better way we just can't remember.
Some(cleanup_comments(&mut comments.into_iter())).filter(|c| !c.is_empty())
} }
/// Get the docs for a function in the given file path at the given file position and return it as /// Get the docs for a function in the given file path at the given file position and return it as
/// a C string pointer /// a C string pointer
#[no_mangle] #[no_mangle]
pub extern "C" fn nd_get_function_docs( pub extern "C" fn lixdoc_get_function_docs(
filename: *const c_char, filename: *const c_char,
line: usize, line: usize,
col: usize, col: usize,
@ -269,9 +401,9 @@ pub extern "C" fn nd_get_function_docs(
.unwrap_or(ptr::null()) .unwrap_or(ptr::null())
} }
/// Call this to free a string from nd_get_function_docs /// Call this to free a string from `lixdoc_get_function_docs`.
#[no_mangle] #[no_mangle]
pub extern "C" fn nd_free_string(s: *const c_char) { pub extern "C" fn lixdoc_free_string(s: *const c_char) {
unsafe { unsafe {
// cast note: this cast is turning something that was cast to const // cast note: this cast is turning something that was cast to const
// back to mut // back to mut
@ -283,35 +415,57 @@ pub extern "C" fn nd_free_string(s: *const c_char) {
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn test_line_conversion() {
let fakefile = "abc\rdef\r\nghi";
assert_eq!(convert_endings(fakefile), "abc\ndef\nghi");
}
#[test] #[test]
fn test_bytepos() { fn test_bytepos() {
let fakefile = "abc\ndef\nghi"; let fakefile = "abc\ndef\nghi";
assert_eq!(find_pos(fakefile, 2, 2), 5); assert_eq!(find_pos(fakefile, 2, 2), 5);
} }
#[test]
fn test_bytepos_unusual() {
let fakefile = convert_endings("abc\rdef\r\nghi");
assert_eq!(find_pos(&fakefile, 2, 2), 5);
assert_eq!(find_pos(&fakefile, 3, 2), 9);
}
/// This test is to check that we correctly resolve byte positions even when inconsistent with
/// character positions.
#[test] #[test]
fn test_bytepos_cursed() { fn test_bytepos_cursed() {
let fakefile = "abc\rdef\r\nghi"; let fakefile = "hello\nwórld";
assert_eq!(find_pos(fakefile, 2, 2), 5); // Try to find the position of the `r` after world, which will be wrong if we don't handle
assert_eq!(find_pos(fakefile, 3, 2), 10); // UTF-8 properly.
let pos = find_pos(&fakefile, 2, 4);
dbg!(&fakefile[pos..]);
assert_eq!(pos, 9)
} }
#[test] #[test]
fn test_comment_stripping() { fn test_comment_stripping() {
let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"]; let ex1 = [DocToken::Comment(
"/* blah blah blah\n foooo baaar\n blah */".to_string(),
)];
assert_eq!( assert_eq!(
cleanup_comments(&mut ex1.iter()), cleanup_comments(&mut ex1.into_iter()),
"blah blah blah\n foooo baaar\n blah" "blah blah blah\n foooo baaar\n blah"
); );
let ex2 = ["# a1", "# a2", "# aa"]; let ex2 = ["# a1", "# a2", "# aa"]
assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1"); .into_iter()
.map(|s| DocToken::Comment(s.to_string()));
assert_eq!(cleanup_comments(&mut ex2.into_iter()), "aa\n\n a2\n\na1");
} }
#[test] #[test]
fn test_dedent() { fn test_dedent() {
let ex1 = "a\n b\n c\n d"; let ex1 = "a\n b\n c\n d";
assert_eq!(dedent_comment(ex1), "a\nb\nc\n d"); assert_eq!(dedent_comment(ex1), ex1);
let ex2 = "a\nb\nc"; let ex2 = "a\nb\nc";
assert_eq!(dedent_comment(ex2), ex2); assert_eq!(dedent_comment(ex2), ex2);
let ex3 = " a\n b\n\n c"; let ex3 = " a\n b\n\n c";
@ -335,4 +489,31 @@ mod tests {
let ex1 = " **Foo**:"; let ex1 = " **Foo**:";
assert_eq!(cleanup_single_line(ex1), ex1); assert_eq!(cleanup_single_line(ex1), ex1);
} }
// TODO: Next CL
//#[test]
//fn comment_test_complex() {
// let testcase = r#"
// rec {
// /*
// Hello
// 23
// This is a comment.
// this is another comment.
// and this is a third comment.
// Way
// go
// */
// meow = { g }: {a, b ? 4, ...}: g: c: 5;
// # And another comment.
// cat = 34;
// # inner layer.
// "inner-layer" = outer: meow;
// }
// "#;
// // Need to find the location of the lambda, we do a quick hack.
// let location = dbg!(testcase.find("{ g }").unwrap() as u32);
//
// //get_function_docs(filename, line, col)
//}
} }

View file

@ -1,38 +1,61 @@
// SPDX-FileCopyrightText: 2024 Jade Lovelace // SPDX-FileCopyrightText: 2024 Jade Lovelace
// // SPDX-FileCopyrightText: 2024 Lunaphied
// SPDX-License-Identifier: BSD-2-Clause OR MIT // SPDX-License-Identifier: BSD-2-Clause OR MIT
use rnix::types::{Lambda, TypedNode}; use rnix::ast::{Expr, Lambda};
use rnix::SyntaxKind::*; use rowan::ast::AstNode;
/// Pretty-prints the arguments to a function /// Pretty-prints the arguments to a function
pub fn pprint_args(lambda: &Lambda) -> String { pub fn pprint_args(lambda: &Lambda) -> String {
// TODO: handle docs directly on NODE_IDENT args (uncommon case) // TODO: handle docs directly on NODE_IDENT args (uncommon case)
let mut lambda = lambda.clone(); let mut lambda = lambda.clone();
let mut depth = 0;
let mut out = String::new(); let mut out = String::new();
loop { loop {
let arg = lambda.arg().unwrap(); let arg = lambda.param().unwrap();
match arg.kind() { for child in arg.syntax().children_with_tokens() {
NODE_IDENT => { //dbg!(child.kind());
out += &format!("*{}*", &arg.to_string()); match child {
rowan::NodeOrToken::Node(node) => {
out.push_str(&node.text().to_string());
if node.kind() == rnix::SyntaxKind::NODE_PAT_ENTRY {
out.push_str(&",\n");
}
}
rowan::NodeOrToken::Token(token) => {
use rnix::SyntaxKind::{
TOKEN_COMMENT, TOKEN_ELLIPSIS, TOKEN_L_BRACE, TOKEN_QUESTION, TOKEN_R_BRACE,
};
match token.kind() {
TOKEN_COMMENT | TOKEN_ELLIPSIS | TOKEN_QUESTION | TOKEN_L_BRACE
| TOKEN_R_BRACE => {
//dbg!(&token);
out.push_str(&token.text().to_string());
if token.kind() == TOKEN_COMMENT {
out.push('\n');
}
}
_ => {}
}
//out.push_str(&token.text().to_string());
}
}
}
out.push_str(": "); out.push_str(": ");
let body = lambda.body().unwrap(); let body = lambda.body().unwrap();
if body.kind() == NODE_LAMBDA { if let Expr::Lambda(inner) = body {
lambda = Lambda::cast(body).unwrap(); lambda = inner;
// If we recurse we want the next line of recursion to be indented and on a new line.
out.push('\n');
for _ in 0..=depth {
out.push('\t');
}
depth += 1;
} else { } else {
// If we don't find an inner lambda we're done with argument handling.
break; break;
} }
} }
NODE_PATTERN => {
out += &format!("*{}*", &arg.to_string());
out.push_str(": ");
break;
}
t => {
unreachable!("unhandled arg type {:?}", t);
}
}
}
out.push_str("..."); out.push_str("...");
out out

View file

@ -474,6 +474,14 @@ stdenv.mkDerivation (finalAttrs: {
# Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above. # Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above.
stdenv.cc stdenv.cc
] ]
++ [
pkgs.rust-analyzer
pkgs.cargo
pkgs.rustc
pkgs.rustfmt
pkgs.rustPlatform.rustLibSrc
pkgs.rustPlatform.rustcSrc
]
++ lib.optionals stdenv.cc.isClang [ ++ lib.optionals stdenv.cc.isClang [
# Required for clang-tidy checks. # Required for clang-tidy checks.
llvmPackages.llvm llvmPackages.llvm

View file

@ -40,24 +40,24 @@
#include <gc/gc_cpp.h> #include <gc/gc_cpp.h>
#endif #endif
// XXX: These are for nix-doc features and will be removed in a future rewrite where this functionality is integrated more natively. // XXX: These are for lix-doc features and will be removed in a future rewrite where this functionality is integrated more natively.
extern "C" { extern "C" {
char const *nd_get_function_docs(char const *filename, size_t line, size_t col); char const *lixdoc_get_function_docs(char const *filename, size_t line, size_t col);
void nd_free_string(char const *str); void lixdoc_free_string(char const *str);
} }
namespace nix { namespace nix {
/** Wrapper around std::unique_ptr with a custom deleter for strings from nix-doc **/ /** Wrapper around std::unique_ptr with a custom deleter for strings from nix-doc **/
using NdString = std::unique_ptr<const char, decltype(&nd_free_string)>; using NdString = std::unique_ptr<const char, decltype(&lixdoc_free_string)>;
/** /**
* Fetch a string representing the doc comment using nix-doc and wrap it in an RAII wrapper. * Fetch a string representing the doc comment using nix-doc and wrap it in an RAII wrapper.
*/ */
NdString lambdaDocsForPos(SourcePath const path, nix::Pos const &pos) { NdString lambdaDocsForPos(SourcePath const path, nix::Pos const &pos) {
std::string const file = path.to_string(); std::string const file = path.to_string();
return NdString{nd_get_function_docs(file.c_str(), pos.line, pos.column), &nd_free_string}; return NdString{lixdoc_get_function_docs(file.c_str(), pos.line, pos.column), &lixdoc_free_string};
} }
/** /**