diff --git a/CHANGELOG.md b/CHANGELOG.md index 536031b7..a587c0f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 3. Invalid multiline strings are left untouched. - All keywords are now set to lowercase. This includes reserved words (like `begin` and `const`) but also includes non-reserved words (like `absolute` and `override`) when used in the context that makes them special. +- Added formatting for the interior of inline comments + - If no whitespace exists after the `//` (or `///`), one space is added. + - "separator" lines are exempt, defined as a comment containing only 10 or more repetitions of a + non-alphanumeric character. + - Trailing whitespace at the end of the comment is trimmed. +- All compiler directives are now set to uppercase. This includes switch directives (e.g. `{$O+}`), parameter directives + (e.g. `{$HINTS on}`), and conditional directives (e.g. `{$IFDEF Foo}`). ## [0.6.0] - 2025-08-25 diff --git a/core/CHANGELOG.md b/core/CHANGELOG.md index 35147f9c..a1176d6f 100644 --- a/core/CHANGELOG.md +++ b/core/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added setting to enable internal formatting of multiline strings: `OptimisingLineFormatterSettings.format_multiline_strings`. - Added `LowercaseKeywords` to convert instances of keywords to lowercase. +- Added `CommentFormatter` to format the interior of comments. ### Changed diff --git a/core/src/rules/comment_contents.rs b/core/src/rules/comment_contents.rs new file mode 100644 index 00000000..a1486cc7 --- /dev/null +++ b/core/src/rules/comment_contents.rs @@ -0,0 +1,339 @@ +use itertools::Itertools; + +use crate::prelude::*; + +pub struct CommentFormatter {} + +fn format_line_comment(tok: &mut Token) { + fn comment_is_separator(comment: &str) -> bool { + let comment = comment.trim_ascii_end(); + comment.len() >= 10 + && comment.chars().next().is_some_and(|b| !b.is_alphanumeric()) + && comment.chars().all_equal() + } + + let content = tok.get_content(); + let mut new_content: Option = None; + let Some(mut comment) = content.strip_prefix("//") else { + return; + }; + + // doc comments have an extra slash + comment = comment.strip_prefix('/').unwrap_or(comment); + + if comment + .bytes() + .next() + .is_some_and(|b| !b.is_ascii_whitespace()) + && !comment_is_separator(comment) + { + let mut str = String::with_capacity(content.len() + 1); + str.push_str(&content[..content.len() - comment.len()]); + str.push(' '); + str.push_str(comment); + new_content = Some(str); + } + + let trimmed = content.trim_ascii_end(); + if trimmed.len() != content.len() { + let new_content = new_content.get_or_insert_with(|| content.to_string()); + new_content.truncate(new_content.trim_ascii_end().len()); + } + + if let Some(new_content) = new_content { + tok.set_content(new_content); + } +} + +fn format_compiler_directive(tok: &mut Token) { + let content = tok.get_content(); + + let Some(stripped) = content + .strip_prefix("{$") + .or_else(|| content.strip_prefix("(*$")) + else { + return; + }; + + enum State { + Before, + AfterPlusMinus, + AfterDigit, + AfterComma, + AfterLetter, + AfterWord, + } + let mut is_switch = false; + let mut state = State::Before; + let mut directive_len = 0; + for b in stripped.bytes() { + match (state, b) { + (State::Before | State::AfterComma, b'a'..=b'z' | b'A'..=b'Z') => { + state = State::AfterLetter; + } + + (State::AfterLetter, b'+' | b'-') => { + state = State::AfterPlusMinus; + is_switch = true; + } + + (State::AfterPlusMinus | State::AfterDigit, b',') => { + state = State::AfterComma; + } + + (State::AfterLetter | State::AfterDigit, b'0'..=b'9') => { + state = State::AfterDigit; + is_switch = true; + } + + ( + State::AfterLetter | State::AfterWord, + b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_', + ) if !is_switch => { + state = State::AfterWord; + } + + // A comma after a letter is only valid in the `AfterWord` case + (State::AfterLetter, b',') => return, + + // In a switch directive, commas and letters must be followed by something else + (State::AfterComma | State::AfterLetter, _) => return, + + _ => break, + }; + + directive_len += 1; + } + + let directive = &stripped[..directive_len]; + + if directive.bytes().any(|b| b.is_ascii_lowercase()) { + let mut str = String::with_capacity(content.len()); + let prefix = &content[..content.len() - stripped.len()]; + str.push_str(prefix); + str.extend(directive.chars().map(|c| c.to_ascii_uppercase())); + let rest = &stripped[directive.len()..]; + str.push_str(rest); + + tok.set_content(str); + } +} + +impl LogicalLineFileFormatter for CommentFormatter { + fn format(&self, formatted_tokens: &mut FormattedTokens<'_>, _input: &[LogicalLine]) { + for (tok, _) in formatted_tokens.tokens_mut() { + let Ok(tok) = tok else { continue }; + match tok.get_token_type() { + TokenType::CompilerDirective | TokenType::ConditionalDirective(_) => { + format_compiler_directive(tok) + } + TokenType::Comment( + CommentKind::InlineBlock + | CommentKind::IndividualBlock + | CommentKind::MultilineBlock, + ) => { + // do nothing, currently + } + TokenType::Comment(CommentKind::InlineLine | CommentKind::IndividualLine) => { + format_line_comment(tok) + } + _ => {} + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn formatter() -> Formatter { + Formatter::builder() + .lexer(DelphiLexer {}) + .parser(DelphiLogicalLineParser {}) + .token_ignorer(FormattingToggler {}) + .file_formatter(CommentFormatter {}) + .reconstructor(default_test_reconstructor()) + .build() + } + + formatter_test_group!( + block_comments, + not_yet_supported = { + "{ a }", + "{ a }", + }, + ); + + formatter_test_group!( + line_comments, + no_space_at_start = { + "//a", + "// a", + }, + empty = { + "//", + "//", + }, + multiple_spaces_at_start_remain = { + "// a", + "// a", + }, + any_ascii_whitespace_at_start_remains = { + " + //\x09a + //\x0Cb + ", + " + //\x09a + //\x0Cb + ", + }, + ascii_whitespace_at_end_is_trimmed = { + "// a\u{A0}\x20\x09\x0C", + "// a\u{A0}", + }, + insert_at_start_and_trim_at_end = { + "//a ", + "// a", + }, + empty_doc = { + "///", + "///", + }, + no_space_at_start_doc = { + "///a", + "/// a", + }, + slash_after_doc = { + "////", + "/// /", + }, + separators_ignored = { + " + //---------- + //----------\x20\x09 + // ---------- + //-------------------- + //++++++++++ + //,,,,,,,,,, + //__________ + //########## + //[[[[[[[[[[ + ", + " + //---------- + //---------- + // ---------- + //-------------------- + //++++++++++ + //,,,,,,,,,, + //__________ + //########## + //[[[[[[[[[[ + ", + }, + not_quite_separators_not_ignored = { + " + //--------- + //---------+ + //++-+++++++ + //OOOOOOOOOO + ", + " + // --------- + // ---------+ + // ++-+++++++ + // OOOOOOOOOO + ", + }, + ); + + formatter_test_group!( + compiler_directives, + brace_style = { + "{$define foo}", + "{$DEFINE foo}", + }, + paren_star_style = { + "(*$define foo*)", + "(*$DEFINE foo*)", + }, + already_uppercase = { + "{$DEFINE foo}", + "{$DEFINE foo}", + }, + nested_style = { + "(*$message '}'*){$message '*)'}", + "(*$MESSAGE '}'*){$MESSAGE '*)'}", + }, + invalid_space_at_start = { + "(*$ define foo*)", + "(*$ define foo*)", + }, + no_space_after = { + "{$message''}", + "{$MESSAGE''}", + }, + switch_directives = { + "{$o+}{$r-}{$z2}{$a16}", + "{$O+}{$R-}{$Z2}{$A16}", + }, + batched_switch_directives = { + "{$o+,r-,b+,a+,a1,a2,a4,a8,a16,z1,z2,z4}{$a1,b+}", + "{$O+,R-,B+,A+,A1,A2,A4,A8,A16,Z1,Z2,Z4}{$A1,B+}", + }, + fake_batched_directives = { + "{$if,comment}", + "{$IF,comment}", + }, + unknown_directive_names = { + "{$asdf}{$fdsa}{$as_df}{$as09df}{$asdf09}{$zyxw}", + "{$ASDF}{$FDSA}{$AS_DF}{$AS09DF}{$ASDF09}{$ZYXW}", + }, + invalid_directive_names_ignored = { + "{$0asdf}{$,a}{$a,b}", + "{$0asdf}{$,a}{$a,b}", + }, + incomplete_switch_directive_ignored = { + "{$a}{$a+,b}{$a+,}", + "{$a}{$a+,b}{$a+,}", + }, + unusual_word_breaks = { + "{$a1a}{$a+!a}{$aa-a}{$a_b=a}", + "{$A1a}{$A+!a}{$AA-a}{$A_B=a}", + }, + ignored_tokens = { + "{$r+}{pasfmt off}{$r+}", + "{$R+}{pasfmt off}{$r+}", + }, + trailing_whitespace_ignored = { + "{$DEFINE foo }", + "{$DEFINE foo }", + }, + ); + + formatter_test_group!( + conditional_directives, + simple_expressions = { + "{$if foo}{$elseif bar}{$endif}", + "{$IF foo}{$ELSEIF bar}{$ENDIF}", + }, + complex_expressions_not_formatted = { + "{$if (foo>bar ) and true }", + "{$IF (foo>bar ) and true }", + }, + all_directives = { + "{$ifdef FOo}{$ifndef fOo}{$if fOo}{$elseif fOo}{$else}{$endif}{$ifend}", + "{$IFDEF FOo}{$IFNDEF fOo}{$IF fOo}{$ELSEIF fOo}{$ELSE}{$ENDIF}{$IFEND}", + }, + nested_directives_not_formatted = { + "{$if {$include foo.inc}}", + "{$IF {$include foo.inc}}", + }, + tricky_quoting = { + "{$if foo = '}'}{$if bar}", + "{$IF foo = '}'}{$IF bar}", + }, + ); +} diff --git a/core/src/rules/mod.rs b/core/src/rules/mod.rs index 1472765a..0424b290 100644 --- a/core/src/rules/mod.rs +++ b/core/src/rules/mod.rs @@ -1,3 +1,4 @@ +pub mod comment_contents; pub mod conditional_directive_consolidator; pub mod deindent_package_directives; pub mod eof_newline; @@ -8,6 +9,7 @@ pub mod lowercase_keywords; pub mod optimising_line_formatter; pub mod token_spacing; +pub use comment_contents::*; pub use conditional_directive_consolidator::*; pub use deindent_package_directives::*; pub use eof_newline::*; diff --git a/front-end/src/lib.rs b/front-end/src/lib.rs index bed2d258..19ec5996 100644 --- a/front-end/src/lib.rs +++ b/front-end/src/lib.rs @@ -323,6 +323,7 @@ pub fn make_formatter(config: &FormattingConfig) -> Formatter { .token_ignorer(IgnoreAsmIstructions {}) .file_formatter(TokenSpacing {}) .file_formatter(LowercaseKeywords {}) + .file_formatter(CommentFormatter {}) .line_formatter(FormatterSelector::new( |logical_line_type| match logical_line_type { LogicalLineType::Eof => Some(eof_newline_formatter),