Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ anstream = "1.0"
arrow = { workspace = true }
async-trait = "0.1"
bytes = { workspace = true }
clap = { version = "4.6.1", features = ["derive", "env", "color"] }
clap = { version = "4.6.1", features = ["derive", "env", "color", "string"] }
criterion = { workspace = true, features = ["html_reports"] }
datafusion = { workspace = true, default-features = true }
datafusion-common = { workspace = true, default-features = true }
Expand Down
9 changes: 5 additions & 4 deletions benchmarks/benches/sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,10 @@ fn handle_run(
name: &str,
) {
rt.block_on(async {
benchmark
let _ = benchmark
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are these changes needed?

.run(ctx, args.validate)
.await
.unwrap_or_else(|err| panic!("Failed to run benchmark {name}: {err:?}"))
.unwrap_or_else(|err| panic!("Failed to run benchmark {name}: {err:?}"));
});
}

Expand Down Expand Up @@ -205,7 +205,7 @@ fn handle_verify(
info!("Verifying benchmark {name} results ...");

rt.block_on(async {
benchmark
let _ = benchmark
.run(ctx, true)
.await
.unwrap_or_else(|err| panic!("Failed to run benchmark {name}: {err:?}"));
Expand Down Expand Up @@ -278,7 +278,8 @@ async fn load_benchmarks(
for path in paths {
debug!("Loading benchmark from {path}");

let benchmark = SqlBenchmark::new(ctx, &path, &*SQL_BENCHMARK_DIRECTORY).await?;
let benchmark =
SqlBenchmark::new(ctx, &path, &*SQL_BENCHMARK_DIRECTORY, None).await?;
let entries = benches
.entry(benchmark.group().to_string())
.or_insert(vec![]);
Expand Down
34 changes: 33 additions & 1 deletion benchmarks/sql_benchmarks/tpch/tpch.suite
Original file line number Diff line number Diff line change
@@ -1,16 +1,48 @@
name = "tpch"
description = "TPC-H SQL benchmarks"

# Query patterns control how numeric QUERY_ID values map to .benchmark files
# during discovery and command resolution. Use exactly one query-id token:
# - {QUERY_ID_PADDED}: two-digit ids, such as q01.benchmark
# - {QUERY_ID}: unpadded ids, such as query-1.benchmark
# If omitted, this defaults to q{QUERY_ID_PADDED}.benchmark.
query_pattern = "q{QUERY_ID_PADDED}.benchmark"

# Path replacements define path-like variables used while parsing benchmark
# files. Relative paths are resolved from this suite file's directory and then
# passed to SqlBenchmark's replacement mapping, so benchmark SQL can refer to
# values such as ${DATA_DIR}. For timed runs, the runner's --path/-p option
# overrides DATA_DIR.
[path_replacements]
DATA_DIR = "../../data"

[[options]]
name = "format"
short = "f"
env = "TPCH_FILE_TYPE"
default = "parquet"
values = ["parquet", "csv", "mem"]
help = "Selects the TPC-H data format."

[[options]]
name = "scale-factor"
short = "sf"
env = "BENCH_SIZE"
default = "1"
values = ["1", "10"]
help = "Selects the TPC-H scale factor."

[[examples]]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a minor aside, these commands don't work yet

andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ ^C
andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo run --bin benchmark_runner -- run tpch 15 -f csv
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.30s
     Running `target/debug/benchmark_runner run tpch 15 -f csv`
External error: error: unrecognized subcommand 'run'

Usage: benchmark_runner [COMMAND]

For more information, try '--help'.

I think you said they are coming in the next PR so that is good

command = "cargo run --release --bin benchmark_runner -- run tpch"
description = "Run all TPC-H queries with the default parquet SF1 configuration."

[[examples]]
command = "cargo run --release --bin benchmark_runner -- run tpch 15"
description = "Run TPC-H query 15 with the default parquet SF1 configuration."

[[examples]]
command = "cargo run --release --bin benchmark_runner -- run tpch 15 -f csv"
description = "Run TPC-H query 15 against CSV data."

[[examples]]
command = "cargo run --release --bin benchmark_runner -- run tpch 15 -sf 10"
description = "Run TPC-H query 15 at scale factor 10."
Loading
Loading