Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
# IntelliJ IDE users
.idea
dist

.DS_Store
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/xtask/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ clap.workspace = true
enum-iterator.workspace = true
reqwest = { version = "0.12.9", features = ["blocking", "json"] }
serde.workspace = true
serde_json.workspace = true
convert_case.workspace = true
camino.workspace = true

Expand Down
137 changes: 137 additions & 0 deletions crates/xtask/src/download_regression_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use anyhow::{bail, Result};
use camino::Utf8PathBuf;
use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{BufRead, Cursor, Write};
use std::process::Command;

pub(crate) fn download_regression_tests() -> Result<()> {
let target_dir = Utf8PathBuf::from("crates/squawk_parser/tests/data/regression_suite");

if target_dir.exists() {
println!("Cleaning target directory: {:?}", target_dir);
remove_dir_all(&target_dir)?;
}

create_dir_all(&target_dir)?;

let urls = fetch_download_urls()?;
let total_files = urls.len();

for (index, url) in urls.iter().enumerate() {
let filename = url.split('/').last().unwrap();
let filepath = target_dir.join(filename);

println!(
"[{}/{}] Downloading {}... ",
index + 1,
total_files,
filename
);

let output = Command::new("curl").args(["-s", url]).output()?;

if !output.status.success() {
let error_msg = String::from_utf8_lossy(&output.stderr);
bail!(anyhow::anyhow!(
"Failed to download '{}': {}",
url,
error_msg
));
}

let mut processed_content = Vec::new();

let cursor = Cursor::new(&output.stdout);

if let Err(e) = preprocess_sql(cursor, &mut processed_content) {
eprintln!("Error: Failed to process file: {}", e);
continue;
}

let mut dest = File::create(&filepath)?;
dest.write_all(&processed_content)?
}

Ok(())
}

fn fetch_download_urls() -> Result<Vec<String>> {
// Fetch list of SQL file URLs
println!("Fetching SQL file URLs...");
let output = Command::new("gh")
.args([
"api",
"-H",
"Accept: application/vnd.github+json",
"/repos/postgres/postgres/contents/src/test/regress/sql",
])
.output()?;

if !output.status.success() {
bail!(anyhow::anyhow!(
"Failed to fetch SQL files: {}",
String::from_utf8_lossy(&output.stderr)
));
}

let json_str = String::from_utf8(output.stdout)?;
let files: Vec<serde_json::Value> = serde_json::from_str(&json_str)?;

// Extract download URLs for SQL files
let urls: Vec<String> = files
.into_iter()
.filter(|file| {
file["name"]
.as_str()
.map(|name| name.ends_with(".sql"))
.unwrap_or(false)
})
.filter_map(|file| file["download_url"].as_str().map(String::from))
.collect();

if urls.is_empty() {
bail!(anyhow::anyhow!("No SQL files found"));
}

Ok(urls)
}

fn preprocess_sql<R: BufRead, W: Write>(source: R, mut dest: W) -> Result<()> {
let mut skipping_copy_block = false;

for line in source.lines() {
let mut line = line?;

// Detect the start of the COPY block
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does a copy block look like? curious what it has that we don't support parsing

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the regression test is executed via psql, and they use COPY to load data into test tables FROM STDIN

COPY bitwise_test FROM STDIN NULL 'null';
1	1	1	1	1	B0101
3	3	3	null	2	B0100
7	7	7	3	4	B1100
\.

if line.starts_with("COPY ") && line.to_lowercase().contains("from stdin") {
skipping_copy_block = true;
continue;
}

// Detect the end of the COPY block
if skipping_copy_block && (line.starts_with("\\.") || line.is_empty()) {
skipping_copy_block = false;
continue;
}

// Skip lines if inside a COPY block
if skipping_copy_block {
continue;
}

if line.starts_with("\\") {
// Skip plpgsql commands (for now)
continue;
}

// replace "\gset" with ";"
if line.contains("\\gset") {
line = line.replace("\\gset", ";");
}

// Write the cleaned line
writeln!(dest, "{}", line)?;
}

Ok(())
}
4 changes: 4 additions & 0 deletions crates/xtask/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use generate_keywords::generate_keywords;
use new_rule::new_lint;
use sync_kwlist::sync_kwlist;

mod download_regression_tests;
mod generate_keywords;
mod new_rule;
mod path_util;
Expand All @@ -18,6 +19,8 @@ enum TaskName {
SyncKwlist,
#[command(long_about = "Create a new linter rule")]
NewRule(NewRuleArgs),
#[command(long_about = "Download and process regression tests from Postgres")]
DownloadRegressionTests,
}

#[derive(Args, Debug)]
Expand All @@ -40,5 +43,6 @@ fn main() -> Result<()> {
TaskName::GenerateKeywords => generate_keywords(),
TaskName::SyncKwlist => sync_kwlist(),
TaskName::NewRule(args) => new_lint(args),
TaskName::DownloadRegressionTests => download_regression_tests::download_regression_tests(),
}
}
Loading