diff --git a/lix-doc/Cargo.lock b/lix-doc/Cargo.lock index d5028edfe..e82e138f5 100644 --- a/lix-doc/Cargo.lock +++ b/lix-doc/Cargo.lock @@ -9,13 +9,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] -name = "cbitset" -version = "0.2.0" +name = "countme" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29b6ad25ae296159fb0da12b970b2fe179b234584d7cd294c891e2bbb284466b" -dependencies = [ - "num-traits", -] +checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" [[package]] name = "dissimilar" @@ -33,19 +30,26 @@ dependencies = [ "once_cell", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "lix-doc" version = "0.0.1" dependencies = [ "expect-test", "rnix", + "rowan", ] [[package]] -name = "num-traits" -version = "0.2.18" +name = "memoffset" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] @@ -56,44 +60,26 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "proc-macro2" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - [[package]] name = "rnix" -version = "0.8.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a9b645f0edba447dbfc6473dd22999f46a1d00ab39e777a2713a1cf34a1597b" +checksum = "bb35cedbeb70e0ccabef2a31bcff0aebd114f19566086300b8f42c725fc2cb5f" dependencies = [ - "cbitset", "rowan", ] [[package]] name = "rowan" -version = "0.9.1" +version = "0.15.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ea7cadf87a9d8432e85cb4eb86bd2e765ace60c24ef86e79084dcae5d1c5a19" +checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49" dependencies = [ + "countme", + "hashbrown", + "memoffset", "rustc-hash", - "smol_str", - "text_unit", - "thin-dst", + "text-size", ] [[package]] @@ -103,59 +89,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] -name = "serde" -version = "1.0.197" +name = "text-size" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.197" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "smol_str" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad6c857cbab2627dcf01ec85a623ca4e7dcb5691cbaa3d7fb7653671f0d09c9" -dependencies = [ - "serde", -] - -[[package]] -name = "syn" -version = "2.0.53" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "text_unit" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20431e104bfecc1a40872578dbc390e10290a0e9c35fffe3ce6f73c15a9dbfc2" - -[[package]] -name = "thin-dst" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3c46be180f1af9673ebb27bc1235396f61ef6965b3fe0dbb2e624deb604f0e" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" diff --git a/lix-doc/Cargo.toml b/lix-doc/Cargo.toml index df4eed932..3f8ded0f7 100644 --- a/lix-doc/Cargo.toml +++ b/lix-doc/Cargo.toml @@ -1,6 +1,6 @@ [package] description = "Nix function documentation tool, stripped down into a library" -edition = "2018" +edition = "2021" name = "lix-doc" version = "0.0.1" license = "BSD-2-Clause OR MIT" @@ -12,7 +12,9 @@ repository = "https://github.com/lf-/nix-doc" crate_type = ["staticlib"] [dependencies] -rnix = "0.8.0" +rnix = "0.11.0" +# Necessary because rnix fails to export a critical trait (Rowan's AstNode). +rowan = "0.15.0" [dev-dependencies] expect-test = "1.1.0" diff --git a/lix-doc/src/lib.rs b/lix-doc/src/lib.rs index 27fe5c9b5..f32b705f5 100644 --- a/lix-doc/src/lib.rs +++ b/lix-doc/src/lib.rs @@ -1,5 +1,5 @@ // SPDX-FileCopyrightText: 2024 Jade Lovelace -// +// SPDX-FileCopyrightText: 2024 Lunaphied // SPDX-License-Identifier: BSD-2-Clause OR MIT //! library components of nix-doc @@ -7,13 +7,16 @@ pub mod pprint; use crate::pprint::pprint_args; -use rnix::types::{Lambda, TypedNode}; -use rnix::SyntaxKind::*; -use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent}; +use rnix::ast::{self, Lambda}; +use rnix::{NodeOrToken, SyntaxKind}; +use rnix::SyntaxNode; + + +// Needed because rnix fucked up and didn't reexport this, oops. +use rowan::ast::AstNode; use std::ffi::{CStr, CString}; use std::fs; -use std::iter; use std::os::raw::c_char; use std::panic; @@ -23,66 +26,104 @@ use std::{fmt::Display, str}; pub type Result = std::result::Result>; -const DOC_INDENT: usize = 3; - struct SearchResult { /// Name of the function identifier: String, - /// Dedented documentation comments + /// Dedented documentation comment doc: String, /// Parameter block for the function param_block: String, } -fn find_pos(file: &str, line: usize, col: usize) -> usize { - let mut lines = 1; - let mut line_start = 0; - let mut it = file.chars().enumerate().peekable(); - while let Some((count, ch)) = it.next() { - if ch == '\n' || ch == '\r' { - lines += 1; - let addend = if ch == '\r' && it.peek().map(|x| x.1) == Some('\n') { - it.next(); - 1 - } else { - 0 - }; - line_start = count + addend; - } - - let col_diff = ((count as i32) - (line_start as i32)).abs() as usize; - if lines == line && col_diff == col { - return count; - } - } - unreachable!(); -} - impl SearchResult { fn format(&self, filename: P, line: usize) -> String { format!( "**Synopsis:** `{}` = {}\n\n{}\n\n# {}", self.identifier.as_str(), self.param_block, - indented(&self.doc, DOC_INDENT), + self.doc, format!("{}:{}", filename, line).as_str(), ) } } -/// Emits a string `s` indented by `indent` spaces -fn indented(s: &str, indent: usize) -> String { - let indent_s = iter::repeat(' ').take(indent).collect::(); - s.split('\n') - .map(|line| indent_s.clone() + line) - .collect::>() - .join("\n") +/// Converts Nix compatible line endings (Nix accepts `\r`, `\n`, *and* `\r\n` as endings), to +/// standard `\n` endings for use within Rust land. +fn convert_endings(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut it = s.chars().peekable(); + + while let Some(ch) = it.next() { + if ch == '\n' || ch == '\r' { + out.push('\n'); + if ch == '\r' && it.peek().map(|&c| c == '\n').unwrap_or(false) { + // Consume `\n` in `\r\n`. + it.next(); + } + } else { + out.push(ch); + } + } + + out +} + +/// Converts the position information from Lix itself into an character index into the file itself. +/// Expects an input string that's already had it's line endings normalized. +/// +/// Note that this returns a *byte* offset, not a character offset. +fn find_pos(s: &str, line: usize, col: usize) -> usize { + // Nix line positions are 1-indexed. + let mut lines = 1; + for (byte_pos, ch) in s.char_indices() { + // If we find a newline, increase the line count. + if ch == '\n' { + lines += 1; + } + + // We've arrived at the correct line. + if lines == line { + // Column position is 1-indexed, and it's a *byte* offset, because Nix doesn't actually + // support UTF-8. Rust does though, so we need to convert to a proper byte index to + // match rnix. Lix also doesn't consider the line endings part of the column offset so + // we implicitly add one to advance to the character *after* that. + return byte_pos + col; + } + } + + // If things never match that should be literally impossible. + unreachable!(); +} + +/// Represents a forwarded token from rnix's AST over to lix-doc. +#[derive(Debug, Clone)] +enum DocToken { + Comment(String), + Whitespace(String), +} + +/// Determine if a given token string contains more than two newlines, this is used to determine when +/// we hit blank lines between comments indicating a contextually unrelated comment. +fn has_empty_line(tok: &DocToken) -> bool { + // It's either solely whitespace with two newlines inside somewhere, or it's + // contained inside a comment token and we don't want to count that as empty. + if let DocToken::Whitespace(s) = tok { + s.chars().filter(|&c| c == '\n').take(2).count() == 2 + } else { + false + } } /// Cleans up a single line, erasing prefix single line comments but preserving indentation -fn cleanup_single_line<'a>(s: &'a str) -> &'a str { +// NOTE: We have a bit of a conflict of interest problem here due to the inconsistent format of +// doc comments. Some doc comments will use a series of single line comments that may then contain `*` +// characters to represent a list. Some will be multiline comments that don't prefix individual lines +// with `*`, only using them for lists directly, and some will prefix lines with `*` as a leading +// character to mark the block. There's no way to disambiguate all three, but we do our best to +// make the common case pretty. +fn cleanup_single_line(s: &str) -> &str { let mut cmt_new_start = 0; let mut iter = s.char_indices().peekable(); while let Some((idx, ch)) = iter.next() { @@ -90,7 +131,9 @@ fn cleanup_single_line<'a>(s: &'a str) -> &'a str { let (_, next_ch) = iter.peek().unwrap_or(&(0, '\n')); // if we find a character, save the byte position after it as our new string start - if ch == '#' || (ch == '*' && next_ch.is_whitespace()) { + // This has special handling for `>` because some Nixpkgs documentation has `*>` right + // after the start of their doc comments, and we want to strip the `*` still. + if ch == '#' || (ch == '*' && (*next_ch == '>' || next_ch.is_whitespace())) { cmt_new_start = idx + 1; break; } @@ -103,15 +146,12 @@ fn cleanup_single_line<'a>(s: &'a str) -> &'a str { &s[cmt_new_start..] } -/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking -/// at the second line if it can. +/// Erases indents in comments based on the indentation of the first line. fn dedent_comment(s: &str) -> String { let mut whitespaces = 0; - let mut lines = s.lines(); - let first = lines.next(); // scan for whitespace - for line in lines.chain(first) { + for line in s.lines() { let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); if line_whitespace != line.len() { @@ -121,16 +161,6 @@ fn dedent_comment(s: &str) -> String { } } - // maybe the first considered line we found was indented further, so let's look for more lines - // that might have a shorter indent. In the case of one line, do nothing. - for line in s.lines().skip(1) { - let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count(); - - if line_whitespace != line.len() { - whitespaces = line_whitespace.min(whitespaces); - } - } - // delete up to `whitespaces` whitespace characters from each line and reconstitute the string let mut out = String::new(); for line in s.lines() { @@ -143,69 +173,163 @@ fn dedent_comment(s: &str) -> String { out } -/// Deletes whitespace and leading comment characters +/// Takes a series of comment and whitespace strings and output a clean single block of text to use +/// as the output documentation comment block. /// -/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a -/// multiline comment, they will be deleted. -fn cleanup_comments, I: DoubleEndedIterator>(comment: &mut I) -> String { +/// This function expects to be given the tokens in reverse order (proceeding upwards from the +/// first comment above the definitions), this allows us to properly enforce the below conditions. +/// The output from this function will be reordered and ready for display. +/// +/// The two types of documentation comments we expect are: +/// +/// - A single multiline comment not whitespace separated from the start. +/// - A series of back to back single line comments not separated by whitespace. +/// +/// Any other combination will be filtered out. +/// +/// Once an empty line is encountered, we know no more valid documentation comments remain and stop. +fn cleanup_comments>(tokens: &mut I) -> String { + // Keep track of when we've found a single line and multiline comment, we use this to + // only process a single multiline or back to back single lines. + let mut found_single_line = false; + + // Comments that have survived our filtering phase and should be cleaned up. + let mut valid = vec![]; + + // Filter out comments that don't meet the characteristics of documentation comments. + for tok in tokens { + if has_empty_line(&tok) { + // Take tokens until we hit whitespace containing an empty line. + break; + } + + // Only care about comments from this point on. + if let DocToken::Comment(comment) = tok { + // Now determine if it's a single line comment. + let is_single_line = comment.starts_with('#'); + + // We've found a single line comment if we've found one before or we just found one. + found_single_line |= is_single_line; + + // What we do next is only special when we hit a multiline comment. + if !is_single_line { + // If we've hit a multiline comment as our first comment, take that one alone. + if !found_single_line { + // Otherwise we've hit a multiline comment immediately and this is our + // one and only doc comment to worry about. + valid.push(comment); + } + // Otherwise we've hit a multiline comment after single line comments, in either + // case this means we're done processing comments. + break; + } + + // Otherwise this is a new single line comment to push to the stack. + valid.push(comment); + } + } + + // Cleanup comments for user consumption. dedent_comment( - &comment + &valid + .into_iter() .rev() .map(|small_comment| { small_comment - .as_ref() - // space before multiline start - .trim_start() - // multiline starts + // Trim off start of multiline comments. .trim_start_matches("/*") - // trailing so we can grab multiline end - .trim_end() - // multiline ends + // Trim off end of multiline comments. .trim_end_matches("*/") - // extra space that was in the multiline + // Trim off any internal whitespace that's trapped inside comments themselves. .trim() + // Split comments by newlines to extract lines of multiline comments. .split('\n') - // erase single line comments and such + // Cleanup single line comments and a few more tweaks for multiline comments. .map(cleanup_single_line) .collect::>() + // Reconstruct the multiline comment's whitespace. .join("\n") }) .collect::>() - .join("\n"), + // We've found that when multiple back to back single line comments are used in Nixpkgs, + // they make more sense to represent as if someone inserted line breaks into the Markdown + // properly, so we join them with linebreaks that markdown will pass through. + .join("\n\n"), ) } -/// Get the docs for a specific function +/// Get the docs for a specific function. +// TODO: Improve error reporting? pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option { let content = fs::read(filename).ok()?; - let decoded = str::from_utf8(&content).ok()?; + let decoded = convert_endings(str::from_utf8(&content).ok()?); let pos = find_pos(&decoded, line, col); - let rowan_pos = TextUnit::from_usize(pos); - let tree = rnix::parse(decoded); + let rowan_pos = rnix::TextSize::from(pos as u32); + // The minimum length of a lambda is 4 characters and thus the range we're looking for must be + // at least 4 characters long `_: 3` being an example of a minimal length lambda. + let rowan_range = rnix::TextRange::at(rowan_pos, 4.into()); + + // Parse the file using rnix. + let root = rnix::Root::parse(&decoded).ok().ok()?; + + // Extract the inner expression that represents the Root node and extract the top level expression. + let expr = root.expr()?; + + // There are two cases we have to be able to handle + // 1. A straightforward definition with an attrset binding to a lambda that's defined inline. + // 2. A lambda defined in a standalone file where the attrset binding imports that file directly. + // The latter case will not be able to find the binding so we must be able to handle not finding it. + + // Find the deepest node or token that covers the position given by Lix. + let covering = expr.syntax().covering_element(rowan_range); + + // Climb up until we find the lambda node that contains that token. let mut lambda = None; - for node in tree.node().preorder() { - match node { - WalkEvent::Enter(n) => { - if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA { - lambda = Lambda::cast(n); - break; - } - } - WalkEvent::Leave(_) => (), + for ancestor in covering.ancestors() { + if ancestor.kind() == SyntaxKind::NODE_LAMBDA { + lambda = Some(ancestor); + break; } } - let lambda = lambda?; - let res = visit_lambda("func".to_string(), &lambda); - Some(res.format(filename, line)) + + // There is literally always a lambda or something has gone very very wrong. + let lambda = + ast::Lambda::cast( + lambda.expect("no lambda found; what.") + ) .expect("not a rnix::ast::Lambda; what."); + + // Search up, hopefully to find the binding so we can get the identifier name. + // TODO: Just provide this directly from the C++ code to make it possible to always have the correct identifier. + let mut binding = None; + for ancestor in lambda.syntax().ancestors() { + if ancestor.kind() == SyntaxKind::NODE_ATTRPATH_VALUE { + binding = Some(ancestor); + } + } + + // Convert the binding to an identifier if it was found, otherwise use a placeholder. + let identifier; + identifier = match binding.clone() { + Some(binding) => ast::AttrpathValue::cast(binding) + .expect("not an rnix::ast::AttrpathValue; what") + .attrpath() + .expect("AttrpathValue has no attrpath; what.") + .to_string(), + _ => "".to_string(), + }; + + // Find all the comments on the binding or the lambda if we have to fall back. + let comment_node = binding.as_ref().unwrap_or(lambda.syntax()); + let comment = find_comment(comment_node).unwrap_or_else(String::new); + + // And display them properly for the markdown function in Lix. + Some(visit_lambda(identifier, comment, &lambda).format(filename, line)) } -fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult { +fn visit_lambda(name: String, comment: String, lambda: &Lambda) -> SearchResult { // grab the arguments - let param_block = pprint_args(&lambda); - - // find the doc comment - let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string()); + let param_block = pprint_args(lambda); SearchResult { identifier: name, @@ -214,39 +338,47 @@ fn visit_lambda(name: String, lambda: &Lambda) -> SearchResult { } } -fn find_comment(node: SyntaxNode) -> Option { - let mut node = NodeOrToken::Node(node); - let mut comments = Vec::new(); - loop { - loop { - if let Some(new) = node.prev_sibling_or_token() { - node = new; - break; - } else { - node = NodeOrToken::Node(node.parent()?); - } - } +fn find_comment(node: &SyntaxNode) -> Option { + let mut it = node + .siblings_with_tokens(rowan::Direction::Prev) + // Skip ourselves as we're always the first token returned. + .skip(1) + .peekable(); - match node.kind() { - TOKEN_COMMENT => match &node { - NodeOrToken::Token(token) => comments.push(token.text().clone()), - NodeOrToken::Node(_) => unreachable!(), - }, - // This stuff is found as part of `the-fn = f: ...` - // here: ^^^^^^^^ - NODE_KEY | TOKEN_ASSIGN => (), - t if t.is_trivia() => (), - _ => break, - } + // Consume up to one whitespace token before the first comment. There might not always be + // whitespace such as the (rather unusual) case of `/* meow */x = a: 3`. + if matches!(it.peek(), Some(NodeOrToken::Token(token)) if token.kind() == SyntaxKind::TOKEN_WHITESPACE) { + it.next(); } - let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str())); - Some(doc).filter(|it| !it.is_empty()) + + let comments = it.map_while(|element| match element { + NodeOrToken::Token(token) => { + match token.kind() { + // Map the tokens we're interested in to our internal token type. + SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())), + SyntaxKind::TOKEN_WHITESPACE => { + Some(DocToken::Whitespace(token.text().to_owned())) + } + // If we hit a different token type, we know we've gone past relevant comments + // and should stop. + _ => None, + } + } + // If we hit a node entry we've definitely gone past comments that would be related to + // this node and we should retreat. + _ => None, + }); + + // For the curious, `into_iter()` here consumes the binding producing an owned value allowing us to avoid + // making the original binding mutable, we don't reuse it later so this is a cute way to handle it, though + // there's probably a better way we just can't remember. + Some(cleanup_comments(&mut comments.into_iter())).filter(|c| !c.is_empty()) } /// Get the docs for a function in the given file path at the given file position and return it as /// a C string pointer #[no_mangle] -pub extern "C" fn nd_get_function_docs( +pub extern "C" fn lixdoc_get_function_docs( filename: *const c_char, line: usize, col: usize, @@ -269,9 +401,9 @@ pub extern "C" fn nd_get_function_docs( .unwrap_or(ptr::null()) } -/// Call this to free a string from nd_get_function_docs +/// Call this to free a string from `lixdoc_get_function_docs`. #[no_mangle] -pub extern "C" fn nd_free_string(s: *const c_char) { +pub extern "C" fn lixdoc_free_string(s: *const c_char) { unsafe { // cast note: this cast is turning something that was cast to const // back to mut @@ -283,35 +415,57 @@ pub extern "C" fn nd_free_string(s: *const c_char) { mod tests { use super::*; + #[test] + fn test_line_conversion() { + let fakefile = "abc\rdef\r\nghi"; + assert_eq!(convert_endings(fakefile), "abc\ndef\nghi"); + } + #[test] fn test_bytepos() { let fakefile = "abc\ndef\nghi"; assert_eq!(find_pos(fakefile, 2, 2), 5); } + #[test] + fn test_bytepos_unusual() { + let fakefile = convert_endings("abc\rdef\r\nghi"); + assert_eq!(find_pos(&fakefile, 2, 2), 5); + assert_eq!(find_pos(&fakefile, 3, 2), 9); + } + + /// This test is to check that we correctly resolve byte positions even when inconsistent with + /// character positions. #[test] fn test_bytepos_cursed() { - let fakefile = "abc\rdef\r\nghi"; - assert_eq!(find_pos(fakefile, 2, 2), 5); - assert_eq!(find_pos(fakefile, 3, 2), 10); + let fakefile = "hello\nwórld"; + // Try to find the position of the `r` after world, which will be wrong if we don't handle + // UTF-8 properly. + let pos = find_pos(&fakefile, 2, 4); + dbg!(&fakefile[pos..]); + assert_eq!(pos, 9) } #[test] fn test_comment_stripping() { - let ex1 = ["/* blah blah blah\n foooo baaar\n blah */"]; + let ex1 = [DocToken::Comment( + "/* blah blah blah\n foooo baaar\n blah */".to_string(), + )]; assert_eq!( - cleanup_comments(&mut ex1.iter()), - "blah blah blah\n foooo baaar\nblah" + cleanup_comments(&mut ex1.into_iter()), + "blah blah blah\n foooo baaar\n blah" ); - let ex2 = ["# a1", "# a2", "# aa"]; - assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n a2\na1"); + let ex2 = ["# a1", "# a2", "# aa"] + .into_iter() + .map(|s| DocToken::Comment(s.to_string())); + assert_eq!(cleanup_comments(&mut ex2.into_iter()), "aa\n\n a2\n\na1"); } #[test] fn test_dedent() { let ex1 = "a\n b\n c\n d"; - assert_eq!(dedent_comment(ex1), "a\nb\nc\n d"); + assert_eq!(dedent_comment(ex1), ex1); let ex2 = "a\nb\nc"; assert_eq!(dedent_comment(ex2), ex2); let ex3 = " a\n b\n\n c"; @@ -335,4 +489,31 @@ mod tests { let ex1 = " **Foo**:"; assert_eq!(cleanup_single_line(ex1), ex1); } + + // TODO: Next CL + //#[test] + //fn comment_test_complex() { + // let testcase = r#" + // rec { + // /* + // Hello + // 23 + // This is a comment. + // this is another comment. + // and this is a third comment. + // Way + // go + // */ + // meow = { g }: {a, b ? 4, ...}: g: c: 5; + // # And another comment. + // cat = 34; + // # inner layer. + // "inner-layer" = outer: meow; + // } + // "#; + // // Need to find the location of the lambda, we do a quick hack. + // let location = dbg!(testcase.find("{ g }").unwrap() as u32); + // + // //get_function_docs(filename, line, col) + //} } diff --git a/lix-doc/src/pprint.rs b/lix-doc/src/pprint.rs index 7e73d2d20..2a72c4069 100644 --- a/lix-doc/src/pprint.rs +++ b/lix-doc/src/pprint.rs @@ -1,36 +1,59 @@ // SPDX-FileCopyrightText: 2024 Jade Lovelace -// +// SPDX-FileCopyrightText: 2024 Lunaphied // SPDX-License-Identifier: BSD-2-Clause OR MIT -use rnix::types::{Lambda, TypedNode}; -use rnix::SyntaxKind::*; +use rnix::ast::{Expr, Lambda}; +use rowan::ast::AstNode; /// Pretty-prints the arguments to a function pub fn pprint_args(lambda: &Lambda) -> String { // TODO: handle docs directly on NODE_IDENT args (uncommon case) let mut lambda = lambda.clone(); + let mut depth = 0; let mut out = String::new(); loop { - let arg = lambda.arg().unwrap(); - match arg.kind() { - NODE_IDENT => { - out += &format!("*{}*", &arg.to_string()); - out.push_str(": "); - let body = lambda.body().unwrap(); - if body.kind() == NODE_LAMBDA { - lambda = Lambda::cast(body).unwrap(); - } else { - break; + let arg = lambda.param().unwrap(); + for child in arg.syntax().children_with_tokens() { + //dbg!(child.kind()); + match child { + rowan::NodeOrToken::Node(node) => { + out.push_str(&node.text().to_string()); + if node.kind() == rnix::SyntaxKind::NODE_PAT_ENTRY { + out.push_str(&",\n"); + } + } + rowan::NodeOrToken::Token(token) => { + use rnix::SyntaxKind::{ + TOKEN_COMMENT, TOKEN_ELLIPSIS, TOKEN_L_BRACE, TOKEN_QUESTION, TOKEN_R_BRACE, + }; + match token.kind() { + TOKEN_COMMENT | TOKEN_ELLIPSIS | TOKEN_QUESTION | TOKEN_L_BRACE + | TOKEN_R_BRACE => { + //dbg!(&token); + out.push_str(&token.text().to_string()); + if token.kind() == TOKEN_COMMENT { + out.push('\n'); + } + } + _ => {} + } + //out.push_str(&token.text().to_string()); } } - NODE_PATTERN => { - out += &format!("*{}*", &arg.to_string()); - out.push_str(": "); - break; - } - t => { - unreachable!("unhandled arg type {:?}", t); + } + out.push_str(": "); + let body = lambda.body().unwrap(); + if let Expr::Lambda(inner) = body { + lambda = inner; + // If we recurse we want the next line of recursion to be indented and on a new line. + out.push('\n'); + for _ in 0..=depth { + out.push('\t'); } + depth += 1; + } else { + // If we don't find an inner lambda we're done with argument handling. + break; } } out.push_str("..."); diff --git a/package.nix b/package.nix index be3bcfb35..6e330bfc1 100644 --- a/package.nix +++ b/package.nix @@ -419,6 +419,14 @@ stdenv.mkDerivation (finalAttrs: { # Load-bearing order. Must come before clang-unwrapped below, but after clang_tools above. stdenv.cc ] + ++ [ + pkgs.rust-analyzer + pkgs.cargo + pkgs.rustc + pkgs.rustfmt + pkgs.rustPlatform.rustLibSrc + pkgs.rustPlatform.rustcSrc + ] ++ lib.optionals stdenv.cc.isClang [ # Required for clang-tidy checks. llvmPackages.llvm diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc index 46b6d57ed..86435117b 100644 --- a/src/libcmd/repl.cc +++ b/src/libcmd/repl.cc @@ -37,24 +37,24 @@ #include #endif -// XXX: These are for nix-doc features and will be removed in a future rewrite where this functionality is integrated more natively. +// XXX: These are for lix-doc features and will be removed in a future rewrite where this functionality is integrated more natively. extern "C" { - char const *nd_get_function_docs(char const *filename, size_t line, size_t col); - void nd_free_string(char const *str); + char const *lixdoc_get_function_docs(char const *filename, size_t line, size_t col); + void lixdoc_free_string(char const *str); } namespace nix { /** Wrapper around std::unique_ptr with a custom deleter for strings from nix-doc **/ -using NdString = std::unique_ptr; +using NdString = std::unique_ptr; /** * Fetch a string representing the doc comment using nix-doc and wrap it in an RAII wrapper. */ NdString lambdaDocsForPos(SourcePath const path, nix::Pos const &pos) { std::string const file = path.to_string(); - return NdString{nd_get_function_docs(file.c_str(), pos.line, pos.column), &nd_free_string}; + return NdString{lixdoc_get_function_docs(file.c_str(), pos.line, pos.column), &lixdoc_free_string}; } /**