-
Notifications
You must be signed in to change notification settings - Fork 0
Data declaration extensions #216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1e4dae5
74d4e49
96a7958
2de9f39
8c4b474
142e4a1
6e16b86
500175d
a93518a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -71,8 +71,9 @@ | |||
| %nonterm construct logic.Construct | ||||
| %nonterm context transactions.Context | ||||
| %nonterm csv_asof String | ||||
| %nonterm csv_column logic.CSVColumn | ||||
| %nonterm csv_columns Sequence[logic.CSVColumn] | ||||
| %nonterm gnf_column logic.GNFColumn | ||||
| %nonterm gnf_column_path Sequence[String] | ||||
| %nonterm gnf_columns Sequence[logic.GNFColumn] | ||||
| %nonterm csv_config logic.CSVConfig | ||||
| %nonterm csv_data logic.CSVData | ||||
| %nonterm csv_locator_inline_data String | ||||
|
|
@@ -137,16 +138,17 @@ | |||
| %nonterm read transactions.Read | ||||
| %nonterm reduce logic.Reduce | ||||
| %nonterm rel_atom logic.RelAtom | ||||
| %nonterm rel_edb logic.RelEDB | ||||
| %nonterm rel_edb_path Sequence[String] | ||||
| %nonterm rel_edb_types Sequence[logic.Type] | ||||
| %nonterm edb logic.EDB | ||||
| %nonterm edb_path Sequence[String] | ||||
| %nonterm edb_types Sequence[logic.Type] | ||||
| %nonterm rel_term logic.RelTerm | ||||
| %nonterm relation_id logic.RelationId | ||||
| %nonterm script logic.Script | ||||
| %nonterm specialized_value logic.Value | ||||
| %nonterm string_type logic.StringType | ||||
| %nonterm sum_monoid logic.SumMonoid | ||||
| %nonterm snapshot transactions.Snapshot | ||||
| %nonterm snapshot_mapping transactions.SnapshotMapping | ||||
| %nonterm sync transactions.Sync | ||||
| %nonterm term logic.Term | ||||
| %nonterm terms Sequence[logic.Term] | ||||
|
|
@@ -899,10 +901,10 @@ functional_dependency_values | |||
| : "(" "values" var* ")" | ||||
|
|
||||
| data | ||||
| : rel_edb | ||||
| construct: $$ = logic.Data(rel_edb=$1) | ||||
| deconstruct if builtin.has_proto_field($$, 'rel_edb'): | ||||
| $1: logic.RelEDB = $$.rel_edb | ||||
| : edb | ||||
| construct: $$ = logic.Data(edb=$1) | ||||
| deconstruct if builtin.has_proto_field($$, 'edb'): | ||||
| $1: logic.EDB = $$.edb | ||||
| | betree_relation | ||||
| construct: $$ = logic.Data(betree_relation=$1) | ||||
| deconstruct if builtin.has_proto_field($$, 'betree_relation'): | ||||
|
|
@@ -912,15 +914,15 @@ data | |||
| deconstruct if builtin.has_proto_field($$, 'csv_data'): | ||||
| $1: logic.CSVData = $$.csv_data | ||||
|
|
||||
| rel_edb_path | ||||
| edb_path | ||||
| : "[" STRING* "]" | ||||
|
|
||||
| rel_edb_types | ||||
| edb_types | ||||
| : "[" type* "]" | ||||
|
|
||||
| rel_edb | ||||
| : "(" "rel_edb" relation_id rel_edb_path rel_edb_types ")" | ||||
| construct: $$ = logic.RelEDB(target_id=$3, path=$4, types=$5) | ||||
| edb | ||||
| : "(" "edb" relation_id edb_path edb_types ")" | ||||
| construct: $$ = logic.EDB(target_id=$3, path=$4, types=$5) | ||||
| deconstruct: | ||||
| $3: logic.RelationId = $$.target_id | ||||
| $4: Sequence[String] = $$.path | ||||
|
|
@@ -947,19 +949,19 @@ betree_info_key_types | |||
| betree_info_value_types | ||||
| : "(" "value_types" type* ")" | ||||
|
|
||||
| csv_columns | ||||
| : "(" "columns" csv_column* ")" | ||||
| gnf_columns | ||||
| : "(" "columns" gnf_column* ")" | ||||
|
|
||||
| csv_asof | ||||
| : "(" "asof" STRING ")" | ||||
|
|
||||
| csv_data | ||||
| : "(" "csv_data" csvlocator csv_config csv_columns csv_asof ")" | ||||
| : "(" "csv_data" csvlocator csv_config gnf_columns csv_asof ")" | ||||
| construct: $$ = logic.CSVData(locator=$3, config=$4, columns=$5, asof=$6) | ||||
| deconstruct: | ||||
| $3: logic.CSVLocator = $$.locator | ||||
| $4: logic.CSVConfig = $$.config | ||||
| $5: Sequence[logic.CSVColumn] = $$.columns | ||||
| $5: Sequence[logic.GNFColumn] = $$.columns | ||||
| $6: String = $$.asof | ||||
|
|
||||
| csv_locator_paths | ||||
|
|
@@ -980,12 +982,22 @@ csv_config | |||
| construct: $$ = construct_csv_config($3) | ||||
| deconstruct: $3: Sequence[Tuple[String, logic.Value]] = deconstruct_csv_config($$) | ||||
|
|
||||
| csv_column | ||||
| : "(" "column" STRING relation_id "[" type* "]" ")" | ||||
| construct: $$ = logic.CSVColumn(column_name=$3, target_id=$4, types=$6) | ||||
| gnf_column_path | ||||
| : STRING | ||||
| construct: $$ = [$1] | ||||
| deconstruct if builtin.length($$) == 1: | ||||
| $1: String = $$[0] | ||||
| | "[" STRING* "]" | ||||
| construct: $$ = $2 | ||||
| deconstruct if builtin.length($$) != 1: | ||||
| $2: Sequence[String] = $$ | ||||
|
|
||||
| gnf_column | ||||
| : "(" "column" gnf_column_path relation_id? "[" type* "]" ")" | ||||
| construct: $$ = logic.GNFColumn(column_path=$3, target_id=$4, types=$6) | ||||
| deconstruct: | ||||
| $3: String = $$.column_name | ||||
| $4: logic.RelationId = $$.target_id | ||||
| $3: Sequence[String] = $$.column_path | ||||
| $4: Optional[logic.RelationId] = $$.target_id if builtin.has_proto_field($$, "target_id") else None | ||||
| $6: Sequence[logic.Type] = $$.types | ||||
|
|
||||
| undefine | ||||
|
|
@@ -998,12 +1010,17 @@ context | |||
| construct: $$ = transactions.Context(relations=$3) | ||||
| deconstruct: $3: Sequence[logic.RelationId] = $$.relations | ||||
|
|
||||
| snapshot | ||||
| : "(" "snapshot" rel_edb_path relation_id ")" | ||||
| construct: $$ = transactions.Snapshot(destination_path=$3, source_relation=$4) | ||||
| snapshot_mapping | ||||
| : edb_path relation_id | ||||
| construct: $$ = transactions.SnapshotMapping(destination_path=$1, source_relation=$2) | ||||
| deconstruct: | ||||
| $3: Sequence[String] = $$.destination_path | ||||
| $4: logic.RelationId = $$.source_relation | ||||
| $1: Sequence[String] = $$.destination_path | ||||
| $2: logic.RelationId = $$.source_relation | ||||
|
|
||||
| snapshot | ||||
| : "(" "snapshot" snapshot_mapping* ")" | ||||
| construct: $$ = transactions.Snapshot(mappings=$3) | ||||
| deconstruct: $3: Sequence[transactions.SnapshotMapping] = $$.mappings | ||||
|
|
||||
| epoch_reads | ||||
| : "(" "reads" read* ")" | ||||
|
|
@@ -1385,6 +1402,7 @@ def deconstruct_export_csv_config(msg: transactions.ExportCSVConfig) -> List[Tup | |||
| return builtin.list_sort(result) | ||||
|
|
||||
|
|
||||
|
|
||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||
| def deconstruct_relation_id_string(msg: logic.RelationId) -> String: | ||||
| name: Optional[String] = builtin.relation_id_to_string(msg) | ||||
| return builtin.unwrap_option(name) | ||||
|
|
||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Claude says: the grammar's gnf_column_path rule indexes into a Sequence[String] with $$[0], which requires the sequence/list indexing support added in that file.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -232,14 +232,14 @@ message Attribute { | |
| // | ||
| message Data { | ||
| oneof data_type { | ||
| RelEDB rel_edb = 1; | ||
| EDB edb = 1; | ||
| BeTreeRelation betree_relation = 2; | ||
| CSVData csv_data = 3; | ||
| // IcebergRelation iceberg_relation = 4; | ||
| // IcebergData iceberg_data = 4; | ||
| } | ||
| } | ||
|
|
||
| message RelEDB { | ||
| message EDB { | ||
| RelationId target_id = 1; | ||
| repeated string path = 2; | ||
| repeated Type types = 3; | ||
|
|
@@ -277,7 +277,7 @@ message BeTreeLocator { | |
| message CSVData { | ||
| CSVLocator locator = 1; | ||
| CSVConfig config = 2; | ||
| repeated CSVColumn columns = 3; | ||
| repeated GNFColumn columns = 3; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if at some point we can unify or better distinguish between columns used for data ingestion vs data export? We currently also have |
||
| string asof = 4; // Blob storage timestamp for freshness requirements | ||
| } | ||
|
|
||
|
|
@@ -311,9 +311,9 @@ message CSVConfig { | |
| string compression = 11; // "none", "gzip", "zstd", "auto" (default: "auto") | ||
| } | ||
|
|
||
| message CSVColumn { | ||
| string column_name = 1; // Name in CSV file | ||
| RelationId target_id = 2; // Target relation | ||
| message GNFColumn { | ||
| repeated string column_path = 1; // Column identifier path (was: string column_name) | ||
| optional RelationId target_id = 2; // Target relation (now explicit optional) | ||
| repeated Type types = 3; // Relation signature (key types + value types) | ||
| } | ||
|
|
||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there semantics for when
gnf_column_pathis a sequence of length 1? Is it something we can explicitly disallow?