diff --git a/.gitignore b/.gitignore index 6c17391b..316bdc4e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ # IntelliJ IDE users .idea dist + +.DS_Store diff --git a/Cargo.lock b/Cargo.lock index aabf18f0..54ec8c80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3756,6 +3756,7 @@ dependencies = [ "enum-iterator", "reqwest 0.12.15", "serde", + "serde_json", ] [[package]] diff --git a/crates/xtask/Cargo.toml b/crates/xtask/Cargo.toml index d53d2c2a..3fdc7d09 100644 --- a/crates/xtask/Cargo.toml +++ b/crates/xtask/Cargo.toml @@ -13,6 +13,7 @@ clap.workspace = true enum-iterator.workspace = true reqwest = { version = "0.12.9", features = ["blocking", "json"] } serde.workspace = true +serde_json.workspace = true convert_case.workspace = true camino.workspace = true diff --git a/crates/xtask/src/download_regression_tests.rs b/crates/xtask/src/download_regression_tests.rs new file mode 100644 index 00000000..d3dad52e --- /dev/null +++ b/crates/xtask/src/download_regression_tests.rs @@ -0,0 +1,137 @@ +use anyhow::{bail, Result}; +use camino::Utf8PathBuf; +use std::fs::{create_dir_all, remove_dir_all, File}; +use std::io::{BufRead, Cursor, Write}; +use std::process::Command; + +pub(crate) fn download_regression_tests() -> Result<()> { + let target_dir = Utf8PathBuf::from("crates/squawk_parser/tests/data/regression_suite"); + + if target_dir.exists() { + println!("Cleaning target directory: {:?}", target_dir); + remove_dir_all(&target_dir)?; + } + + create_dir_all(&target_dir)?; + + let urls = fetch_download_urls()?; + let total_files = urls.len(); + + for (index, url) in urls.iter().enumerate() { + let filename = url.split('/').last().unwrap(); + let filepath = target_dir.join(filename); + + println!( + "[{}/{}] Downloading {}... ", + index + 1, + total_files, + filename + ); + + let output = Command::new("curl").args(["-s", url]).output()?; + + if !output.status.success() { + let error_msg = String::from_utf8_lossy(&output.stderr); + bail!(anyhow::anyhow!( + "Failed to download '{}': {}", + url, + error_msg + )); + } + + let mut processed_content = Vec::new(); + + let cursor = Cursor::new(&output.stdout); + + if let Err(e) = preprocess_sql(cursor, &mut processed_content) { + eprintln!("Error: Failed to process file: {}", e); + continue; + } + + let mut dest = File::create(&filepath)?; + dest.write_all(&processed_content)? + } + + Ok(()) +} + +fn fetch_download_urls() -> Result> { + // Fetch list of SQL file URLs + println!("Fetching SQL file URLs..."); + let output = Command::new("gh") + .args([ + "api", + "-H", + "Accept: application/vnd.github+json", + "/repos/postgres/postgres/contents/src/test/regress/sql", + ]) + .output()?; + + if !output.status.success() { + bail!(anyhow::anyhow!( + "Failed to fetch SQL files: {}", + String::from_utf8_lossy(&output.stderr) + )); + } + + let json_str = String::from_utf8(output.stdout)?; + let files: Vec = serde_json::from_str(&json_str)?; + + // Extract download URLs for SQL files + let urls: Vec = files + .into_iter() + .filter(|file| { + file["name"] + .as_str() + .map(|name| name.ends_with(".sql")) + .unwrap_or(false) + }) + .filter_map(|file| file["download_url"].as_str().map(String::from)) + .collect(); + + if urls.is_empty() { + bail!(anyhow::anyhow!("No SQL files found")); + } + + Ok(urls) +} + +fn preprocess_sql(source: R, mut dest: W) -> Result<()> { + let mut skipping_copy_block = false; + + for line in source.lines() { + let mut line = line?; + + // Detect the start of the COPY block + if line.starts_with("COPY ") && line.to_lowercase().contains("from stdin") { + skipping_copy_block = true; + continue; + } + + // Detect the end of the COPY block + if skipping_copy_block && (line.starts_with("\\.") || line.is_empty()) { + skipping_copy_block = false; + continue; + } + + // Skip lines if inside a COPY block + if skipping_copy_block { + continue; + } + + if line.starts_with("\\") { + // Skip plpgsql commands (for now) + continue; + } + + // replace "\gset" with ";" + if line.contains("\\gset") { + line = line.replace("\\gset", ";"); + } + + // Write the cleaned line + writeln!(dest, "{}", line)?; + } + + Ok(()) +} diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs index c0c2264c..a67246d1 100644 --- a/crates/xtask/src/main.rs +++ b/crates/xtask/src/main.rs @@ -5,6 +5,7 @@ use generate_keywords::generate_keywords; use new_rule::new_lint; use sync_kwlist::sync_kwlist; +mod download_regression_tests; mod generate_keywords; mod new_rule; mod path_util; @@ -18,6 +19,8 @@ enum TaskName { SyncKwlist, #[command(long_about = "Create a new linter rule")] NewRule(NewRuleArgs), + #[command(long_about = "Download and process regression tests from Postgres")] + DownloadRegressionTests, } #[derive(Args, Debug)] @@ -40,5 +43,6 @@ fn main() -> Result<()> { TaskName::GenerateKeywords => generate_keywords(), TaskName::SyncKwlist => sync_kwlist(), TaskName::NewRule(args) => new_lint(args), + TaskName::DownloadRegressionTests => download_regression_tests::download_regression_tests(), } }