2024-03-18 00:01:05 +00:00
|
|
|
// SPDX-FileCopyrightText: 2024 Jade Lovelace
|
2024-06-18 22:24:49 +00:00
|
|
|
// SPDX-FileCopyrightText: 2024 Lunaphied
|
2024-03-18 00:01:05 +00:00
|
|
|
// SPDX-License-Identifier: BSD-2-Clause OR MIT
|
|
|
|
|
|
|
|
//! library components of nix-doc
|
|
|
|
pub mod pprint;
|
|
|
|
|
|
|
|
use crate::pprint::pprint_args;
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
use rnix::ast::{self, Lambda};
|
|
|
|
use rnix::{NodeOrToken, SyntaxKind};
|
|
|
|
use rnix::SyntaxNode;
|
|
|
|
|
|
|
|
|
|
|
|
// Needed because rnix fucked up and didn't reexport this, oops.
|
|
|
|
use rowan::ast::AstNode;
|
2024-03-18 00:01:05 +00:00
|
|
|
|
|
|
|
use std::ffi::{CStr, CString};
|
|
|
|
use std::fs;
|
|
|
|
use std::os::raw::c_char;
|
|
|
|
use std::panic;
|
|
|
|
|
|
|
|
use std::ptr;
|
|
|
|
|
|
|
|
use std::{fmt::Display, str};
|
|
|
|
|
|
|
|
pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
|
|
|
|
|
|
|
|
struct SearchResult {
|
|
|
|
/// Name of the function
|
|
|
|
identifier: String,
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Dedented documentation comment
|
2024-03-18 00:01:05 +00:00
|
|
|
doc: String,
|
|
|
|
|
|
|
|
/// Parameter block for the function
|
|
|
|
param_block: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl SearchResult {
|
|
|
|
fn format<P: Display>(&self, filename: P, line: usize) -> String {
|
|
|
|
format!(
|
|
|
|
"**Synopsis:** `{}` = {}\n\n{}\n\n# {}",
|
|
|
|
self.identifier.as_str(),
|
|
|
|
self.param_block,
|
2024-06-18 22:24:49 +00:00
|
|
|
self.doc,
|
2024-03-18 00:01:05 +00:00
|
|
|
format!("{}:{}", filename, line).as_str(),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Converts Nix compatible line endings (Nix accepts `\r`, `\n`, *and* `\r\n` as endings), to
|
|
|
|
/// standard `\n` endings for use within Rust land.
|
|
|
|
fn convert_endings(s: &str) -> String {
|
|
|
|
let mut out = String::with_capacity(s.len());
|
|
|
|
let mut it = s.chars().peekable();
|
|
|
|
|
|
|
|
while let Some(ch) = it.next() {
|
|
|
|
if ch == '\n' || ch == '\r' {
|
|
|
|
out.push('\n');
|
|
|
|
if ch == '\r' && it.peek().map(|&c| c == '\n').unwrap_or(false) {
|
|
|
|
// Consume `\n` in `\r\n`.
|
|
|
|
it.next();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
out.push(ch);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Converts the position information from Lix itself into an character index into the file itself.
|
|
|
|
/// Expects an input string that's already had it's line endings normalized.
|
|
|
|
///
|
|
|
|
/// Note that this returns a *byte* offset, not a character offset.
|
|
|
|
fn find_pos(s: &str, line: usize, col: usize) -> usize {
|
|
|
|
// Nix line positions are 1-indexed.
|
|
|
|
let mut lines = 1;
|
|
|
|
for (byte_pos, ch) in s.char_indices() {
|
|
|
|
// If we find a newline, increase the line count.
|
|
|
|
if ch == '\n' {
|
|
|
|
lines += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We've arrived at the correct line.
|
|
|
|
if lines == line {
|
|
|
|
// Column position is 1-indexed, and it's a *byte* offset, because Nix doesn't actually
|
|
|
|
// support UTF-8. Rust does though, so we need to convert to a proper byte index to
|
|
|
|
// match rnix. Lix also doesn't consider the line endings part of the column offset so
|
|
|
|
// we implicitly add one to advance to the character *after* that.
|
|
|
|
return byte_pos + col;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If things never match that should be literally impossible.
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Represents a forwarded token from rnix's AST over to lix-doc.
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
enum DocToken {
|
|
|
|
Comment(String),
|
|
|
|
Whitespace(String),
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Determine if a given token string contains more than two newlines, this is used to determine when
|
|
|
|
/// we hit blank lines between comments indicating a contextually unrelated comment.
|
|
|
|
fn has_empty_line(tok: &DocToken) -> bool {
|
|
|
|
// It's either solely whitespace with two newlines inside somewhere, or it's
|
|
|
|
// contained inside a comment token and we don't want to count that as empty.
|
|
|
|
if let DocToken::Whitespace(s) = tok {
|
|
|
|
s.chars().filter(|&c| c == '\n').take(2).count() == 2
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Cleans up a single line, erasing prefix single line comments but preserving indentation
|
2024-06-18 22:24:49 +00:00
|
|
|
// NOTE: We have a bit of a conflict of interest problem here due to the inconsistent format of
|
|
|
|
// doc comments. Some doc comments will use a series of single line comments that may then contain `*`
|
|
|
|
// characters to represent a list. Some will be multiline comments that don't prefix individual lines
|
|
|
|
// with `*`, only using them for lists directly, and some will prefix lines with `*` as a leading
|
|
|
|
// character to mark the block. There's no way to disambiguate all three, but we do our best to
|
|
|
|
// make the common case pretty.
|
|
|
|
fn cleanup_single_line(s: &str) -> &str {
|
2024-03-18 00:01:05 +00:00
|
|
|
let mut cmt_new_start = 0;
|
2024-05-15 22:24:03 +00:00
|
|
|
let mut iter = s.char_indices().peekable();
|
|
|
|
while let Some((idx, ch)) = iter.next() {
|
|
|
|
// peek at the next character, with an explicit '\n' as "next character" at end of line
|
|
|
|
let (_, next_ch) = iter.peek().unwrap_or(&(0, '\n'));
|
|
|
|
|
2024-03-18 00:01:05 +00:00
|
|
|
// if we find a character, save the byte position after it as our new string start
|
2024-06-18 22:24:49 +00:00
|
|
|
// This has special handling for `>` because some Nixpkgs documentation has `*>` right
|
|
|
|
// after the start of their doc comments, and we want to strip the `*` still.
|
|
|
|
if ch == '#' || (ch == '*' && (*next_ch == '>' || next_ch.is_whitespace())) {
|
2024-03-18 00:01:05 +00:00
|
|
|
cmt_new_start = idx + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// if, instead, we are on a line with no starting comment characters, leave it alone as it
|
|
|
|
// will be handled by dedent later
|
|
|
|
if !ch.is_whitespace() {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
&s[cmt_new_start..]
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Erases indents in comments based on the indentation of the first line.
|
2024-03-18 00:01:05 +00:00
|
|
|
fn dedent_comment(s: &str) -> String {
|
|
|
|
let mut whitespaces = 0;
|
|
|
|
|
|
|
|
// scan for whitespace
|
2024-06-18 22:24:49 +00:00
|
|
|
for line in s.lines() {
|
2024-03-18 00:01:05 +00:00
|
|
|
let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
|
|
|
|
|
|
|
|
if line_whitespace != line.len() {
|
|
|
|
// a non-whitespace line, perfect for taking whitespace off of
|
|
|
|
whitespaces = line_whitespace;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// delete up to `whitespaces` whitespace characters from each line and reconstitute the string
|
|
|
|
let mut out = String::new();
|
|
|
|
for line in s.lines() {
|
|
|
|
let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
|
|
|
|
out.push_str(&line[content_begin.min(whitespaces)..]);
|
|
|
|
out.push('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
out.truncate(out.trim_end_matches('\n').len());
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Takes a series of comment and whitespace strings and output a clean single block of text to use
|
|
|
|
/// as the output documentation comment block.
|
2024-03-18 00:01:05 +00:00
|
|
|
///
|
2024-06-18 22:24:49 +00:00
|
|
|
/// This function expects to be given the tokens in reverse order (proceeding upwards from the
|
|
|
|
/// first comment above the definitions), this allows us to properly enforce the below conditions.
|
|
|
|
/// The output from this function will be reordered and ready for display.
|
|
|
|
///
|
|
|
|
/// The two types of documentation comments we expect are:
|
|
|
|
///
|
|
|
|
/// - A single multiline comment not whitespace separated from the start.
|
|
|
|
/// - A series of back to back single line comments not separated by whitespace.
|
|
|
|
///
|
|
|
|
/// Any other combination will be filtered out.
|
|
|
|
///
|
|
|
|
/// Once an empty line is encountered, we know no more valid documentation comments remain and stop.
|
|
|
|
fn cleanup_comments<I: Iterator<Item = DocToken>>(tokens: &mut I) -> String {
|
|
|
|
// Keep track of when we've found a single line and multiline comment, we use this to
|
|
|
|
// only process a single multiline or back to back single lines.
|
|
|
|
let mut found_single_line = false;
|
|
|
|
|
|
|
|
// Comments that have survived our filtering phase and should be cleaned up.
|
|
|
|
let mut valid = vec![];
|
|
|
|
|
|
|
|
// Filter out comments that don't meet the characteristics of documentation comments.
|
|
|
|
for tok in tokens {
|
|
|
|
if has_empty_line(&tok) {
|
|
|
|
// Take tokens until we hit whitespace containing an empty line.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Only care about comments from this point on.
|
|
|
|
if let DocToken::Comment(comment) = tok {
|
|
|
|
// Now determine if it's a single line comment.
|
|
|
|
let is_single_line = comment.starts_with('#');
|
|
|
|
|
|
|
|
// We've found a single line comment if we've found one before or we just found one.
|
|
|
|
found_single_line |= is_single_line;
|
|
|
|
|
|
|
|
// What we do next is only special when we hit a multiline comment.
|
|
|
|
if !is_single_line {
|
|
|
|
// If we've hit a multiline comment as our first comment, take that one alone.
|
|
|
|
if !found_single_line {
|
|
|
|
// Otherwise we've hit a multiline comment immediately and this is our
|
|
|
|
// one and only doc comment to worry about.
|
|
|
|
valid.push(comment);
|
|
|
|
}
|
|
|
|
// Otherwise we've hit a multiline comment after single line comments, in either
|
|
|
|
// case this means we're done processing comments.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise this is a new single line comment to push to the stack.
|
|
|
|
valid.push(comment);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Cleanup comments for user consumption.
|
2024-03-18 00:01:05 +00:00
|
|
|
dedent_comment(
|
2024-06-18 22:24:49 +00:00
|
|
|
&valid
|
|
|
|
.into_iter()
|
2024-03-18 00:01:05 +00:00
|
|
|
.rev()
|
|
|
|
.map(|small_comment| {
|
|
|
|
small_comment
|
2024-06-18 22:24:49 +00:00
|
|
|
// Trim off start of multiline comments.
|
2024-03-18 00:01:05 +00:00
|
|
|
.trim_start_matches("/*")
|
2024-06-18 22:24:49 +00:00
|
|
|
// Trim off end of multiline comments.
|
2024-03-18 00:01:05 +00:00
|
|
|
.trim_end_matches("*/")
|
2024-06-18 22:24:49 +00:00
|
|
|
// Trim off any internal whitespace that's trapped inside comments themselves.
|
2024-03-18 00:01:05 +00:00
|
|
|
.trim()
|
2024-06-18 22:24:49 +00:00
|
|
|
// Split comments by newlines to extract lines of multiline comments.
|
2024-03-18 00:01:05 +00:00
|
|
|
.split('\n')
|
2024-06-18 22:24:49 +00:00
|
|
|
// Cleanup single line comments and a few more tweaks for multiline comments.
|
2024-03-18 00:01:05 +00:00
|
|
|
.map(cleanup_single_line)
|
|
|
|
.collect::<Vec<_>>()
|
2024-06-18 22:24:49 +00:00
|
|
|
// Reconstruct the multiline comment's whitespace.
|
2024-03-18 00:01:05 +00:00
|
|
|
.join("\n")
|
|
|
|
})
|
|
|
|
.collect::<Vec<_>>()
|
2024-06-18 22:24:49 +00:00
|
|
|
// We've found that when multiple back to back single line comments are used in Nixpkgs,
|
|
|
|
// they make more sense to represent as if someone inserted line breaks into the Markdown
|
|
|
|
// properly, so we join them with linebreaks that markdown will pass through.
|
|
|
|
.join("\n\n"),
|
2024-03-18 00:01:05 +00:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Get the docs for a specific function.
|
|
|
|
// TODO: Improve error reporting?
|
2024-03-18 00:01:05 +00:00
|
|
|
pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
|
|
|
|
let content = fs::read(filename).ok()?;
|
2024-06-18 22:24:49 +00:00
|
|
|
let decoded = convert_endings(str::from_utf8(&content).ok()?);
|
2024-03-18 00:01:05 +00:00
|
|
|
let pos = find_pos(&decoded, line, col);
|
2024-06-18 22:24:49 +00:00
|
|
|
let rowan_pos = rnix::TextSize::from(pos as u32);
|
|
|
|
|
|
|
|
// The minimum length of a lambda is 4 characters and thus the range we're looking for must be
|
|
|
|
// at least 4 characters long `_: 3` being an example of a minimal length lambda.
|
|
|
|
let rowan_range = rnix::TextRange::at(rowan_pos, 4.into());
|
|
|
|
|
|
|
|
// Parse the file using rnix.
|
|
|
|
let root = rnix::Root::parse(&decoded).ok().ok()?;
|
|
|
|
|
|
|
|
// Extract the inner expression that represents the Root node and extract the top level expression.
|
|
|
|
let expr = root.expr()?;
|
|
|
|
|
|
|
|
// There are two cases we have to be able to handle
|
|
|
|
// 1. A straightforward definition with an attrset binding to a lambda that's defined inline.
|
|
|
|
// 2. A lambda defined in a standalone file where the attrset binding imports that file directly.
|
|
|
|
// The latter case will not be able to find the binding so we must be able to handle not finding it.
|
2024-03-18 00:01:05 +00:00
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
// Find the deepest node or token that covers the position given by Lix.
|
|
|
|
let covering = expr.syntax().covering_element(rowan_range);
|
|
|
|
|
|
|
|
// Climb up until we find the lambda node that contains that token.
|
2024-03-18 00:01:05 +00:00
|
|
|
let mut lambda = None;
|
2024-06-18 22:24:49 +00:00
|
|
|
for ancestor in covering.ancestors() {
|
|
|
|
if ancestor.kind() == SyntaxKind::NODE_LAMBDA {
|
|
|
|
lambda = Some(ancestor);
|
|
|
|
break;
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
}
|
2024-06-18 22:24:49 +00:00
|
|
|
|
|
|
|
// There is literally always a lambda or something has gone very very wrong.
|
|
|
|
let lambda =
|
|
|
|
ast::Lambda::cast(
|
|
|
|
lambda.expect("no lambda found; what.")
|
|
|
|
) .expect("not a rnix::ast::Lambda; what.");
|
|
|
|
|
|
|
|
// Search up, hopefully to find the binding so we can get the identifier name.
|
|
|
|
// TODO: Just provide this directly from the C++ code to make it possible to always have the correct identifier.
|
|
|
|
let mut binding = None;
|
|
|
|
for ancestor in lambda.syntax().ancestors() {
|
|
|
|
if ancestor.kind() == SyntaxKind::NODE_ATTRPATH_VALUE {
|
|
|
|
binding = Some(ancestor);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Convert the binding to an identifier if it was found, otherwise use a placeholder.
|
|
|
|
let identifier;
|
|
|
|
identifier = match binding.clone() {
|
|
|
|
Some(binding) => ast::AttrpathValue::cast(binding)
|
|
|
|
.expect("not an rnix::ast::AttrpathValue; what")
|
|
|
|
.attrpath()
|
|
|
|
.expect("AttrpathValue has no attrpath; what.")
|
|
|
|
.to_string(),
|
|
|
|
_ => "<unknown binding>".to_string(),
|
|
|
|
};
|
|
|
|
|
|
|
|
// Find all the comments on the binding or the lambda if we have to fall back.
|
|
|
|
let comment_node = binding.as_ref().unwrap_or(lambda.syntax());
|
|
|
|
let comment = find_comment(comment_node).unwrap_or_else(String::new);
|
|
|
|
|
|
|
|
// And display them properly for the markdown function in Lix.
|
|
|
|
Some(visit_lambda(identifier, comment, &lambda).format(filename, line))
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
fn visit_lambda(name: String, comment: String, lambda: &Lambda) -> SearchResult {
|
2024-03-18 00:01:05 +00:00
|
|
|
// grab the arguments
|
2024-06-18 22:24:49 +00:00
|
|
|
let param_block = pprint_args(lambda);
|
2024-03-18 00:01:05 +00:00
|
|
|
|
|
|
|
SearchResult {
|
|
|
|
identifier: name,
|
|
|
|
doc: comment,
|
2024-05-15 22:24:03 +00:00
|
|
|
param_block,
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
fn find_comment(node: &SyntaxNode) -> Option<String> {
|
|
|
|
let mut it = node
|
|
|
|
.siblings_with_tokens(rowan::Direction::Prev)
|
|
|
|
// Skip ourselves as we're always the first token returned.
|
|
|
|
.skip(1)
|
|
|
|
.peekable();
|
|
|
|
|
|
|
|
// Consume up to one whitespace token before the first comment. There might not always be
|
|
|
|
// whitespace such as the (rather unusual) case of `/* meow */x = a: 3`.
|
|
|
|
if matches!(it.peek(), Some(NodeOrToken::Token(token)) if token.kind() == SyntaxKind::TOKEN_WHITESPACE) {
|
|
|
|
it.next();
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
2024-06-18 22:24:49 +00:00
|
|
|
|
|
|
|
let comments = it.map_while(|element| match element {
|
|
|
|
NodeOrToken::Token(token) => {
|
|
|
|
match token.kind() {
|
|
|
|
// Map the tokens we're interested in to our internal token type.
|
|
|
|
SyntaxKind::TOKEN_COMMENT => Some(DocToken::Comment(token.text().to_owned())),
|
|
|
|
SyntaxKind::TOKEN_WHITESPACE => {
|
|
|
|
Some(DocToken::Whitespace(token.text().to_owned()))
|
|
|
|
}
|
|
|
|
// If we hit a different token type, we know we've gone past relevant comments
|
|
|
|
// and should stop.
|
|
|
|
_ => None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If we hit a node entry we've definitely gone past comments that would be related to
|
|
|
|
// this node and we should retreat.
|
|
|
|
_ => None,
|
|
|
|
});
|
|
|
|
|
|
|
|
// For the curious, `into_iter()` here consumes the binding producing an owned value allowing us to avoid
|
|
|
|
// making the original binding mutable, we don't reuse it later so this is a cute way to handle it, though
|
|
|
|
// there's probably a better way we just can't remember.
|
|
|
|
Some(cleanup_comments(&mut comments.into_iter())).filter(|c| !c.is_empty())
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the docs for a function in the given file path at the given file position and return it as
|
|
|
|
/// a C string pointer
|
|
|
|
#[no_mangle]
|
2024-06-18 22:24:49 +00:00
|
|
|
pub extern "C" fn lixdoc_get_function_docs(
|
2024-03-18 00:01:05 +00:00
|
|
|
filename: *const c_char,
|
|
|
|
line: usize,
|
|
|
|
col: usize,
|
2024-05-15 22:24:03 +00:00
|
|
|
) -> *const c_char {
|
2024-03-18 00:01:05 +00:00
|
|
|
let fname = unsafe { CStr::from_ptr(filename) };
|
|
|
|
fname
|
|
|
|
.to_str()
|
|
|
|
.ok()
|
|
|
|
.and_then(|f| {
|
|
|
|
panic::catch_unwind(|| get_function_docs(f, line, col))
|
|
|
|
.map_err(|e| {
|
|
|
|
eprintln!("panic!! {:#?}", e);
|
|
|
|
e
|
|
|
|
})
|
2024-05-15 22:24:03 +00:00
|
|
|
.ok()
|
2024-03-18 00:01:05 +00:00
|
|
|
})
|
2024-05-15 22:24:03 +00:00
|
|
|
.flatten()
|
2024-03-18 00:01:05 +00:00
|
|
|
.and_then(|s| CString::new(s).ok())
|
|
|
|
.map(|s| s.into_raw() as *const c_char)
|
|
|
|
.unwrap_or(ptr::null())
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
/// Call this to free a string from `lixdoc_get_function_docs`.
|
2024-03-18 00:01:05 +00:00
|
|
|
#[no_mangle]
|
2024-06-18 22:24:49 +00:00
|
|
|
pub extern "C" fn lixdoc_free_string(s: *const c_char) {
|
2024-03-18 00:01:05 +00:00
|
|
|
unsafe {
|
|
|
|
// cast note: this cast is turning something that was cast to const
|
|
|
|
// back to mut
|
|
|
|
drop(CString::from_raw(s as *mut c_char));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
#[test]
|
|
|
|
fn test_line_conversion() {
|
|
|
|
let fakefile = "abc\rdef\r\nghi";
|
|
|
|
assert_eq!(convert_endings(fakefile), "abc\ndef\nghi");
|
|
|
|
}
|
|
|
|
|
2024-03-18 00:01:05 +00:00
|
|
|
#[test]
|
|
|
|
fn test_bytepos() {
|
|
|
|
let fakefile = "abc\ndef\nghi";
|
|
|
|
assert_eq!(find_pos(fakefile, 2, 2), 5);
|
|
|
|
}
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
#[test]
|
|
|
|
fn test_bytepos_unusual() {
|
|
|
|
let fakefile = convert_endings("abc\rdef\r\nghi");
|
|
|
|
assert_eq!(find_pos(&fakefile, 2, 2), 5);
|
|
|
|
assert_eq!(find_pos(&fakefile, 3, 2), 9);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// This test is to check that we correctly resolve byte positions even when inconsistent with
|
|
|
|
/// character positions.
|
2024-03-18 00:01:05 +00:00
|
|
|
#[test]
|
|
|
|
fn test_bytepos_cursed() {
|
2024-06-18 22:24:49 +00:00
|
|
|
let fakefile = "hello\nwórld";
|
|
|
|
// Try to find the position of the `r` after world, which will be wrong if we don't handle
|
|
|
|
// UTF-8 properly.
|
|
|
|
let pos = find_pos(&fakefile, 2, 4);
|
|
|
|
dbg!(&fakefile[pos..]);
|
|
|
|
assert_eq!(pos, 9)
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_comment_stripping() {
|
2024-06-18 22:24:49 +00:00
|
|
|
let ex1 = [DocToken::Comment(
|
|
|
|
"/* blah blah blah\n foooo baaar\n blah */".to_string(),
|
|
|
|
)];
|
2024-03-18 00:01:05 +00:00
|
|
|
assert_eq!(
|
2024-06-18 22:24:49 +00:00
|
|
|
cleanup_comments(&mut ex1.into_iter()),
|
|
|
|
"blah blah blah\n foooo baaar\n blah"
|
2024-03-18 00:01:05 +00:00
|
|
|
);
|
|
|
|
|
2024-06-18 22:24:49 +00:00
|
|
|
let ex2 = ["# a1", "# a2", "# aa"]
|
|
|
|
.into_iter()
|
|
|
|
.map(|s| DocToken::Comment(s.to_string()));
|
|
|
|
assert_eq!(cleanup_comments(&mut ex2.into_iter()), "aa\n\n a2\n\na1");
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_dedent() {
|
|
|
|
let ex1 = "a\n b\n c\n d";
|
2024-06-18 22:24:49 +00:00
|
|
|
assert_eq!(dedent_comment(ex1), ex1);
|
2024-03-18 00:01:05 +00:00
|
|
|
let ex2 = "a\nb\nc";
|
|
|
|
assert_eq!(dedent_comment(ex2), ex2);
|
|
|
|
let ex3 = " a\n b\n\n c";
|
|
|
|
assert_eq!(dedent_comment(ex3), "a\nb\n\n c");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_single_line_comment_stripping() {
|
|
|
|
let ex1 = " * a";
|
|
|
|
let ex2 = " # a";
|
|
|
|
let ex3 = " a";
|
2024-05-15 22:24:03 +00:00
|
|
|
let ex4 = " *";
|
2024-03-18 00:01:05 +00:00
|
|
|
assert_eq!(cleanup_single_line(ex1), " a");
|
|
|
|
assert_eq!(cleanup_single_line(ex2), " a");
|
|
|
|
assert_eq!(cleanup_single_line(ex3), ex3);
|
2024-05-15 22:24:03 +00:00
|
|
|
assert_eq!(cleanup_single_line(ex4), "");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_single_line_retains_bold_headings() {
|
|
|
|
let ex1 = " **Foo**:";
|
|
|
|
assert_eq!(cleanup_single_line(ex1), ex1);
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|
2024-06-18 22:24:49 +00:00
|
|
|
|
|
|
|
// TODO: Next CL
|
|
|
|
//#[test]
|
|
|
|
//fn comment_test_complex() {
|
|
|
|
// let testcase = r#"
|
|
|
|
// rec {
|
|
|
|
// /*
|
|
|
|
// Hello
|
|
|
|
// 23
|
|
|
|
// This is a comment.
|
|
|
|
// this is another comment.
|
|
|
|
// and this is a third comment.
|
|
|
|
// Way
|
|
|
|
// go
|
|
|
|
// */
|
|
|
|
// meow = { g }: {a, b ? 4, ...}: g: c: 5;
|
|
|
|
// # And another comment.
|
|
|
|
// cat = 34;
|
|
|
|
// # inner layer.
|
|
|
|
// "inner-layer" = outer: meow;
|
|
|
|
// }
|
|
|
|
// "#;
|
|
|
|
// // Need to find the location of the lambda, we do a quick hack.
|
|
|
|
// let location = dbg!(testcase.find("{ g }").unwrap() as u32);
|
|
|
|
//
|
|
|
|
// //get_function_docs(filename, line, col)
|
|
|
|
//}
|
2024-03-18 00:01:05 +00:00
|
|
|
}
|