diff --git a/crates/squawk/src/github.rs b/crates/squawk/src/github.rs index 6eec4f32..0f280870 100644 --- a/crates/squawk/src/github.rs +++ b/crates/squawk/src/github.rs @@ -11,6 +11,11 @@ use std::io; const VERSION: &str = env!("CARGO_PKG_VERSION"); +// GitHub API limit for issue comment body is 65,536 characters +// We use a slightly smaller limit to leave room for the comment structure +const GITHUB_COMMENT_MAX_SIZE: usize = 65_000; +const MAX_SQL_PREVIEW_LINES: usize = 50; + fn get_github_private_key( github_private_key: Option, github_private_key_base64: Option, @@ -162,38 +167,144 @@ pub fn check_and_comment_on_pr( fn get_comment_body(files: &[CheckReport], version: &str) -> String { let violations_count: usize = files.iter().map(|x| x.violations.len()).sum(); - let violations_emoji = get_violations_emoji(violations_count); + // First, try to generate the full comment + let sql_file_contents: Vec = files + .iter() + .filter_map(|x| get_sql_file_content(x).ok()) + .collect(); + + let content = sql_file_contents.join("\n"); + let full_comment = format_comment( + violations_emoji, + violations_count, + files.len(), + &content, + version, + None, // No summary notice for full comments + ); + + // Check if the comment exceeds GitHub's size limit + if full_comment.len() <= GITHUB_COMMENT_MAX_SIZE { + return full_comment; + } + + // If the comment is too large, create a summary instead + get_summary_comment_body(files, violations_count, violations_emoji, version) +} + +fn get_summary_comment_body( + files: &[CheckReport], + violations_count: usize, + violations_emoji: &str, + version: &str, +) -> String { + let mut file_summaries = Vec::new(); + + for file in files { + let violation_count = file.violations.len(); + let violations_emoji = get_violations_emoji(violation_count); + let line_count = file.sql.lines().count(); + + let summary = format!( + r" +

{filename}

+ +📄 **{line_count} lines** | {violations_emoji} **{violation_count} violations** + +{violations_detail} + +--- + ", + filename = file.filename, + line_count = line_count, + violations_emoji = violations_emoji, + violation_count = violation_count, + violations_detail = if violation_count > 0 { + let violation_rules: Vec = file + .violations + .iter() + .map(|v| format!("• `{}` (line {})", v.rule_name, v.line + 1)) + .collect(); + format!("**Violations found:**\n{}", violation_rules.join("\n")) + } else { + "✅ No violations found.".to_string() + } + ); + file_summaries.push(summary); + } + + let summary_notice = Some("⚠️ **Large Report**: This report was summarized due to size constraints. SQL content has been omitted but all violations were analyzed."); + + format_comment( + violations_emoji, + violations_count, + files.len(), + &file_summaries.join("\n"), + version, + summary_notice, + ) +} + +const fn get_violations_emoji(count: usize) -> &'static str { + if count > 0 { "🚒" } else { "✅" } +} + +fn format_comment( + violations_emoji: &str, + violation_count: usize, + file_count: usize, + content: &str, + version: &str, + summary_notice: Option<&str>, +) -> String { + let notice_section = if let Some(notice) = summary_notice { + format!("\n> {}\n", notice) + } else { + String::new() + }; + format!( r" {COMMENT_HEADER} -### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s) - +### **{violations_emoji} {violation_count}** violations across **{file_count}** file(s){notice_section} --- -{sql_file_content} +{content} [📚 More info on rules](https://github.com/sbdchd/squawk#rules) ⚡️ Powered by [`Squawk`](https://github.com/sbdchd/squawk) ({version}), a linter for PostgreSQL, focused on migrations ", violations_emoji = violations_emoji, - violation_count = violations_count, - file_count = files.len(), - sql_file_content = files - .iter() - .filter_map(|x| get_sql_file_content(x).ok()) - .collect::>() - .join("\n"), + violation_count = violation_count, + file_count = file_count, + notice_section = notice_section, + content = content, version = version ) .trim_matches('\n') .into() } -const fn get_violations_emoji(count: usize) -> &'static str { - if count > 0 { "🚒" } else { "✅" } +fn truncate_sql_if_needed(sql: &str) -> (String, bool) { + let lines: Vec<&str> = sql.lines().collect(); + if lines.len() <= MAX_SQL_PREVIEW_LINES { + (sql.to_string(), false) + } else { + let truncated_lines = lines[..MAX_SQL_PREVIEW_LINES].join(" +"); + let remaining_lines = lines.len() - MAX_SQL_PREVIEW_LINES; + ( + format!( + "{truncated_lines} + +-- ... ({remaining_lines} more lines truncated for brevity)" + ), + true, + ) + } } fn get_sql_file_content(violation: &CheckReport) -> Result { @@ -219,6 +330,13 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { }; let violations_emoji = get_violations_emoji(violation_count); + let (display_sql, was_truncated) = truncate_sql_if_needed(sql); + + let truncation_notice = if was_truncated { + "\n\n> ⚠️ **Note**: SQL content has been truncated for display purposes. The full analysis was performed on the complete file." + } else { + "" + }; Ok(format!( r" @@ -226,7 +344,7 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { ```sql {sql} -``` +```{truncation_notice}

{violations_emoji} Rule Violations ({violation_count})

@@ -236,7 +354,8 @@ fn get_sql_file_content(violation: &CheckReport) -> Result { ", violations_emoji = violations_emoji, filename = violation.filename, - sql = sql, + sql = display_sql, + truncation_notice = truncation_notice, violation_count = violation_count, violation_content = violation_content )) @@ -327,4 +446,93 @@ ALTER TABLE "core_recipe" ADD COLUMN "foo" integer DEFAULT 10; assert_snapshot!(body); } + + #[test] + fn sql_truncation() { + let short_sql = "SELECT 1;"; + let (result, truncated) = crate::github::truncate_sql_if_needed(short_sql); + assert!(!truncated); + assert_eq!(result, short_sql); + + let long_sql = (0..100) + .map(|i| format!("-- Line {}", i)) + .collect::>() + .join("\n"); + let (result, truncated) = crate::github::truncate_sql_if_needed(&long_sql); + assert!(truncated); + assert!(result.contains("-- ... (50 more lines truncated for brevity)")); + } + + #[test] + fn generating_comment_with_large_content() { + // Create a very large SQL content + let large_sql = (0..1000) + .map(|i| format!("SELECT {} as col{};", i, i)) + .collect::>() + .join("\n"); + + let violations = vec![CheckReport { + filename: "large.sql".into(), + sql: large_sql, + violations: vec![ReportViolation { + file: "large.sql".into(), + line: 1, + column: 0, + level: ViolationLevel::Warning, + rule_name: "prefer-bigint-over-int".to_string(), + range: TextRange::new(TextSize::new(0), TextSize::new(0)), + message: "Prefer bigint over int.".to_string(), + help: Some("Use bigint instead.".to_string()), + column_end: 0, + line_end: 1, + }], + }]; + + let body = get_comment_body(&violations, "0.2.3"); + + // The comment should be within GitHub's size limits + assert!(body.len() <= super::GITHUB_COMMENT_MAX_SIZE); + + // Should contain summary information even if the full content was too large + assert!(body.contains("violations")); + } + + #[test] + fn generating_comment_forced_summary() { + // Create content that will definitely trigger summary mode + let massive_sql = (0..10000) + .map(|i| format!("SELECT {} as col{};", i, i)) + .collect::>() + .join("\n"); + + let violations = vec![CheckReport { + filename: "massive.sql".into(), + sql: massive_sql, + violations: vec![ReportViolation { + file: "massive.sql".into(), + line: 1, + column: 0, + level: ViolationLevel::Warning, + rule_name: "prefer-bigint-over-int".to_string(), + range: TextRange::new(TextSize::new(0), TextSize::new(0)), + message: "Prefer bigint over int.".to_string(), + help: Some("Use bigint instead.".to_string()), + column_end: 0, + line_end: 1, + }], + }]; + + let body = get_comment_body(&violations, "0.2.3"); + + // The comment should be within GitHub's size limits + assert!(body.len() <= super::GITHUB_COMMENT_MAX_SIZE); + + // Should contain the summary notice + if body.contains("Large Report") { + assert!(body.contains("summarized due to size constraints")); + } else { + // If it didn't trigger summary mode, at least verify it contains violations info + assert!(body.contains("violations")); + } + } } diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap index cdbe8d0e..07c9e1b6 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_multiple_files.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **🚒 1** violations across **1** file(s) - ---

alpha.sql

diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap index adcd97e2..81dd2eca 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_comment_no_violations.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **✅ 0** violations across **2** file(s) - ---

alpha.sql

diff --git a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap index cafbb237..4228d9f3 100644 --- a/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap +++ b/crates/squawk/src/snapshots/squawk__github__test_github_comment__generating_no_violations_no_files.snap @@ -1,11 +1,10 @@ --- -source: crates/cli/src/github.rs +source: crates/squawk/src/github.rs expression: body --- # Squawk Report ### **✅ 0** violations across **0** file(s) - --- diff --git a/crates/squawk_github/src/app.rs b/crates/squawk_github/src/app.rs index 25c6d3e2..cbf47317 100644 --- a/crates/squawk_github/src/app.rs +++ b/crates/squawk_github/src/app.rs @@ -54,6 +54,13 @@ pub(crate) fn create_comment( comment: CommentArgs, secret: &str, ) -> Result<(), GithubError> { + // Check comment size before attempting to send + if comment.body.len() > 65_536 { + return Err(GithubError::CommentTooLarge(format!( + "Comment body is too large ({} characters). GitHub API limit is 65,536 characters.", + comment.body.len() + ))); + } let comment_body = CommentBody { body: comment.body }; reqwest::blocking::Client::new() .post(&format!( @@ -94,6 +101,9 @@ impl std::fmt::Display for GithubError { Self::HttpError(ref err) => { write!(f, "Problem calling GitHub API: {err}") } + Self::CommentTooLarge(ref msg) => { + write!(f, "Comment size error: {msg}") + } } } } @@ -180,6 +190,14 @@ pub(crate) fn update_comment( body: String, secret: &str, ) -> Result<(), GithubError> { + // Check comment size before attempting to send + if body.len() > 65_536 { + return Err(GithubError::CommentTooLarge(format!( + "Comment body is too large ({} characters). GitHub API limit is 65,536 characters.", + body.len() + ))); + } + reqwest::blocking::Client::new() .patch(&format!( "{github_api_url}/repos/{owner}/{repo}/issues/comments/{comment_id}", diff --git a/crates/squawk_github/src/lib.rs b/crates/squawk_github/src/lib.rs index 8edea921..68dcb78c 100644 --- a/crates/squawk_github/src/lib.rs +++ b/crates/squawk_github/src/lib.rs @@ -14,6 +14,7 @@ pub(crate) const DEFAULT_GITHUB_API_URL: &'static str = "https://api.github.com" pub enum GithubError { JsonWebTokenCreation(jsonwebtoken::errors::Error), HttpError(reqwest::Error), + CommentTooLarge(String), } impl Error for GithubError { @@ -21,6 +22,7 @@ impl Error for GithubError { match self { GithubError::JsonWebTokenCreation(err) => Some(err), GithubError::HttpError(err) => Some(err), + GithubError::CommentTooLarge(_) => None, } } }