Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions bin/namescore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.
#
# namescore - Calculate the namescore (percentage of column references with name information) of an EXPLAIN PLAN

exec "$(dirname "$0")"/pyactivate -m materialize.cli.namescore "$@"
94 changes: 94 additions & 0 deletions misc/python/materialize/cli/namescore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

"""Calculate the `namescore`---the perctange of column references with
name information---of an `EXPLAIN PLAN`. By default, runs on all SLT
files in $MZ_ROOT/test/sqllogictest."""

import argparse
import os
import re

from materialize import MZ_ROOT

SLT_ROOT = MZ_ROOT / "test" / "sqllogictest"

COLUMN_REF_RE = re.compile(
r"""
\#[0-9]+({[^}]+})?
""",
re.VERBOSE,
)


def find_slt_files() -> list[str]:
"""Find all .slt files in $MZ_ROOT/test/sqllogictest directory"""
slt_files = []
for root, _dirs, files in os.walk(SLT_ROOT):
for file in files:
if file.endswith(".slt"):
slt_files.append(os.path.join(root, file))
return slt_files


def namescore(filename: str) -> tuple[int, int]:
"""Calculate the namescore of a file"""
named_refs = 0
refs = 0
with open(filename) as f:
content = f.read()
for match in COLUMN_REF_RE.finditer(content):
refs += 1

if match.group(1):
named_refs += 1
return (named_refs, refs)


def main() -> None:
parser = argparse.ArgumentParser(
prog="namescore",
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""
calculates the `namescore` (percentage of column references with names)
of given files (or all SLT files in $MZ_ROOT/test/sqllogictest by default)""",
)

parser.add_argument(
"tests",
nargs="*",
help="explicit files to run on [default: all SLT files in $MZ_ROOT/test/sqllogictest]",
)
args = parser.parse_args()

tests = args.tests or find_slt_files()

named_refs = 0
refs = 0
nonames = 0
total = len(tests)
for test in tests:
nr, r = namescore(test)
if r == 0:
assert nr == 0
nonames += 1
continue

print(
f"{test.removeprefix(str(SLT_ROOT) + os.sep)}: {nr / r * 100:.2f}% ({nr} / {r})"
)
named_refs += nr
refs += r
print(
f"\nOverall namescore: {named_refs / refs * 100:.2f}% ({named_refs} / {refs}); {nonames} files with no column references / {total} total files"
)


if __name__ == "__main__":
main()
22 changes: 11 additions & 11 deletions test/sqllogictest/aggregates.slt
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,11 @@ SELECT a, sum(b) from agg_pk group by a order by a
3 4

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(b) from agg_pk group by a
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(b) from agg_pk group by a
----
Explained Query:
Project (#0, #3) // { arity: 2 }
Map (integer_to_bigint(#1)) // { arity: 4 }
Project (#0{a}, #3) // { arity: 2 }
Map (integer_to_bigint(#1{b})) // { arity: 4 }
ReadStorage materialize.public.agg_pk // { arity: 3 }

Source materialize.public.agg_pk
Expand All @@ -246,11 +246,11 @@ SELECT a, sum(c) from agg_pk group by a order by a
3 5

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(c) from agg_pk group by a
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT a, sum(c) from agg_pk group by a
----
Explained Query:
Project (#0, #3) // { arity: 2 }
Map (bigint_to_numeric(#2)) // { arity: 4 }
Project (#0{a}, #3) // { arity: 2 }
Map (bigint_to_numeric(#2{c})) // { arity: 4 }
ReadStorage materialize.public.agg_pk // { arity: 3 }

Source materialize.public.agg_pk
Expand Down Expand Up @@ -719,18 +719,18 @@ EOF

# Should include only one sum(*) aggregate.
query T multiline
EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT stddev(x), sum(x) FROM t_variance;
EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT stddev(x), sum(x) FROM t_variance;
----
Explained Query:
With
cte l0 =
Reduce aggregates=[sum((#0 * #0)), sum(#0), count(#0)]
Reduce aggregates=[sum((#0{x} * #0{x})), sum(#0{x}), count(#0{x})]
ReadStorage materialize.public.t_variance
Return
Project (#4, #3)
Map (sqrtf64(case when ((#0) IS NULL OR (#1) IS NULL OR (case when (#2 = 0) then null else #2 end) IS NULL OR (case when (0 = (#2 - 1)) then null else (#2 - 1) end) IS NULL) then null else greatest(((#0 - ((#1 * #1) / bigint_to_double(case when (#2 = 0) then null else #2 end))) / bigint_to_double(case when (0 = (#2 - 1)) then null else (#2 - 1) end)), 0) end))
Project (#4, #3{sum_x})
Map (sqrtf64(case when ((#0{sum}) IS NULL OR (#1{sum_x}) IS NULL OR (case when (#2{count_x} = 0) then null else #2{count_x} end) IS NULL OR (case when (0 = (#2{count_x} - 1)) then null else (#2{count_x} - 1) end) IS NULL) then null else greatest(((#0{sum} - ((#1{sum_x} * #1{sum_x}) / bigint_to_double(case when (#2{count_x} = 0) then null else #2{count_x} end))) / bigint_to_double(case when (0 = (#2{count_x} - 1)) then null else (#2{count_x} - 1) end)), 0) end))
Union
Project (#0..=#2, #1)
Project (#0{sum}..=#2{count_x}, #1{sum_x})
Get l0
Map (null, null, 0, null)
Union
Expand Down
26 changes: 13 additions & 13 deletions test/sqllogictest/alter-table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -293,18 +293,18 @@ SELECT v2.a, v3.a, v4.a, v2.b, v3.b, v4.c FROM v2 JOIN v3 ON v2.a = v3.a JOIN v4
7 7 7 bar bar 1000-01-01␠00:00:00

query T multiline
EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT v2.a, v3.a, v4.a, v2.b, v3.b, v4.c FROM v2 JOIN v3 ON v2.a = v3.a JOIN v4 ON v2.a = v4.a;
EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT v2.a, v3.a, v4.a, v2.b, v3.b, v4.c FROM v2 JOIN v3 ON v2.a = v3.a JOIN v4 ON v2.a = v4.a;
----
Explained Query:
Project (#0, #0, #0, #1, #3, #5)
Join on=(#0 = #2 = #4) type=delta
ArrangeBy keys=[[#0]]
Project (#0{a}, #0{a}, #0{a}, #1{b}, #3{b}, #5{c})
Join on=(#0{a} = #2{a} = #4{a}) type=delta
ArrangeBy keys=[[#0{a}]]
ReadIndex on=v2 v2_idx=[delta join 1st input (full scan)]
ArrangeBy keys=[[#0]]
Project (#0, #1)
ArrangeBy keys=[[#0{a}]]
Project (#0{a}, #1{b})
ReadIndex on=v3 v3_idx_full_scan=[*** full scan ***]
ArrangeBy keys=[[#0]]
Project (#0, #2)
ArrangeBy keys=[[#0{a}]]
Project (#0{a}, #2{c})
ReadStorage materialize.public.t1

Source materialize.public.t1
Expand Down Expand Up @@ -334,17 +334,17 @@ statement ok
CREATE INDEX t1_idx_a ON t1 (a);

query T multiline
EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT v2.a, v3.a, v4.a, v2.b, v3.b, v4.c FROM v2 JOIN v3 ON v2.a = v3.a JOIN v4 ON v2.a = v4.a;
EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT v2.a, v3.a, v4.a, v2.b, v3.b, v4.c FROM v2 JOIN v3 ON v2.a = v3.a JOIN v4 ON v2.a = v4.a;
----
Explained Query:
With
cte l0 =
ArrangeBy keys=[[#0]]
ArrangeBy keys=[[#0{a}]]
ReadIndex on=t1 t1_idx_a=[delta join lookup]
Return
Project (#0, #0, #0, #1, #3, #7)
Join on=(#0 = #2 = #5) type=delta
ArrangeBy keys=[[#0]]
Project (#0{a}, #0{a}, #0{a}, #1{b}, #3{b}, #7{c})
Join on=(#0{a} = #2{a} = #5{a}) type=delta
ArrangeBy keys=[[#0{a}]]
ReadIndex on=v2 v2_idx=[delta join 1st input (full scan)]
Get l0
Get l0
Expand Down
20 changes: 10 additions & 10 deletions test/sqllogictest/arithmetic.slt
Original file line number Diff line number Diff line change
Expand Up @@ -951,14 +951,14 @@ SELECT
1 1 5

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT
x1 >> x2 << x3 as r1,
y1 << y2 >> y3 as r2
FROM nums
----
Explained Query:
Project (#9, #10) // { arity: 2 }
Map (((#0 >> smallint_to_integer(#1)) << smallint_to_integer(#2)), ((#3 << #4) >> #5)) // { arity: 11 }
Map (((#0{x1} >> smallint_to_integer(#1{x2})) << smallint_to_integer(#2{x3})), ((#3{y1} << #4{y2}) >> #5{y3})) // { arity: 11 }
ReadStorage materialize.public.nums // { arity: 9 }

Source materialize.public.nums
Expand Down Expand Up @@ -986,14 +986,14 @@ SELECT
8 8 5

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT
x1 >> x2 & x3 as r1,
y1 << y2 & y3 as r2
FROM nums
----
Explained Query:
Project (#9, #10) // { arity: 2 }
Map (((#0 >> smallint_to_integer(#1)) & #2), ((#3 << #4) & #5)) // { arity: 11 }
Map (((#0{x1} >> smallint_to_integer(#1{x2})) & #2{x3}), ((#3{y1} << #4{y2}) & #5{y3})) // { arity: 11 }
ReadStorage materialize.public.nums // { arity: 9 }

Source materialize.public.nums
Expand Down Expand Up @@ -1021,7 +1021,7 @@ SELECT
0 0 1

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT
x1 & x2 | x3 as r1,
y1 & y2 | y3 as r2,
z1 & z2 | z3 as r3,
Expand All @@ -1030,7 +1030,7 @@ FROM nums
----
Explained Query:
Project (#9..=#12) // { arity: 4 }
Map (((#0 & #1) | #2), ((#3 & #4) | #5), ((#6 & #7) | #8), (integer_to_bigint((smallint_to_integer(#0) & #4)) | #8)) // { arity: 13 }
Map (((#0{x1} & #1{x2}) | #2{x3}), ((#3{y1} & #4{y2}) | #5{y3}), ((#6{z1} & #7{z2}) | #8{z3}), (integer_to_bigint((smallint_to_integer(#0{x1}) & #4{y2})) | #8{z3})) // { arity: 13 }
ReadStorage materialize.public.nums // { arity: 9 }

Source materialize.public.nums
Expand Down Expand Up @@ -1058,7 +1058,7 @@ SELECT
0 0 1

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT
x1 # x2 & x3 as r1,
y1 # y2 & y3 as r2,
z1 # z2 & z3 as r3,
Expand All @@ -1067,7 +1067,7 @@ FROM nums
----
Explained Query:
Project (#9..=#12) // { arity: 4 }
Map (((#0 # #1) & #2), ((#3 # #4) & #5), ((#6 # #7) & #8), (integer_to_bigint((smallint_to_integer(#0) # #4)) & #8)) // { arity: 13 }
Map (((#0{x1} # #1{x2}) & #2{x3}), ((#3{y1} # #4{y2}) & #5{y3}), ((#6{z1} # #7{z2}) & #8{z3}), (integer_to_bigint((smallint_to_integer(#0{x1}) # #4{y2})) & #8{z3})) // { arity: 13 }
ReadStorage materialize.public.nums // { arity: 9 }

Source materialize.public.nums
Expand Down Expand Up @@ -1095,7 +1095,7 @@ SELECT
1 1 0

query T multiline
EXPLAIN OPTIMIZED PLAN WITH(arity, join implementations) AS VERBOSE TEXT FOR SELECT
EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT
x1 # x2 | x3 as r1,
y1 # y2 | y3 as r2,
z1 # z2 | z3 as r3,
Expand All @@ -1104,7 +1104,7 @@ FROM nums
----
Explained Query:
Project (#9..=#12) // { arity: 4 }
Map (((#0 # #1) | #2), ((#3 # #4) | #5), ((#6 # #7) | #8), (integer_to_bigint((smallint_to_integer(#0) # #4)) | #8)) // { arity: 13 }
Map (((#0{x1} # #1{x2}) | #2{x3}), ((#3{y1} # #4{y2}) | #5{y3}), ((#6{z1} # #7{z2}) | #8{z3}), (integer_to_bigint((smallint_to_integer(#0{x1}) # #4{y2})) | #8{z3})) // { arity: 13 }
ReadStorage materialize.public.nums // { arity: 9 }

Source materialize.public.nums
Expand Down
Loading