Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 46 additions & 28 deletions meta/src/meta/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@
%nonterm construct logic.Construct
%nonterm context transactions.Context
%nonterm csv_asof String
%nonterm csv_column logic.CSVColumn
%nonterm csv_columns Sequence[logic.CSVColumn]
%nonterm gnf_column logic.GNFColumn
%nonterm gnf_column_path Sequence[String]
%nonterm gnf_columns Sequence[logic.GNFColumn]
%nonterm csv_config logic.CSVConfig
%nonterm csv_data logic.CSVData
%nonterm csv_locator_inline_data String
Expand Down Expand Up @@ -137,16 +138,17 @@
%nonterm read transactions.Read
%nonterm reduce logic.Reduce
%nonterm rel_atom logic.RelAtom
%nonterm rel_edb logic.RelEDB
%nonterm rel_edb_path Sequence[String]
%nonterm rel_edb_types Sequence[logic.Type]
%nonterm edb logic.EDB
%nonterm edb_path Sequence[String]
%nonterm edb_types Sequence[logic.Type]
%nonterm rel_term logic.RelTerm
%nonterm relation_id logic.RelationId
%nonterm script logic.Script
%nonterm specialized_value logic.Value
%nonterm string_type logic.StringType
%nonterm sum_monoid logic.SumMonoid
%nonterm snapshot transactions.Snapshot
%nonterm snapshot_mapping transactions.SnapshotMapping
%nonterm sync transactions.Sync
%nonterm term logic.Term
%nonterm terms Sequence[logic.Term]
Expand Down Expand Up @@ -899,10 +901,10 @@ functional_dependency_values
: "(" "values" var* ")"

data
: rel_edb
construct: $$ = logic.Data(rel_edb=$1)
deconstruct if builtin.has_proto_field($$, 'rel_edb'):
$1: logic.RelEDB = $$.rel_edb
: edb
construct: $$ = logic.Data(edb=$1)
deconstruct if builtin.has_proto_field($$, 'edb'):
$1: logic.EDB = $$.edb
| betree_relation
construct: $$ = logic.Data(betree_relation=$1)
deconstruct if builtin.has_proto_field($$, 'betree_relation'):
Expand All @@ -912,15 +914,15 @@ data
deconstruct if builtin.has_proto_field($$, 'csv_data'):
$1: logic.CSVData = $$.csv_data

rel_edb_path
edb_path
: "[" STRING* "]"

rel_edb_types
edb_types
: "[" type* "]"

rel_edb
: "(" "rel_edb" relation_id rel_edb_path rel_edb_types ")"
construct: $$ = logic.RelEDB(target_id=$3, path=$4, types=$5)
edb
: "(" "edb" relation_id edb_path edb_types ")"
construct: $$ = logic.EDB(target_id=$3, path=$4, types=$5)
deconstruct:
$3: logic.RelationId = $$.target_id
$4: Sequence[String] = $$.path
Expand All @@ -947,19 +949,19 @@ betree_info_key_types
betree_info_value_types
: "(" "value_types" type* ")"

csv_columns
: "(" "columns" csv_column* ")"
gnf_columns
: "(" "columns" gnf_column* ")"

csv_asof
: "(" "asof" STRING ")"

csv_data
: "(" "csv_data" csvlocator csv_config csv_columns csv_asof ")"
: "(" "csv_data" csvlocator csv_config gnf_columns csv_asof ")"
construct: $$ = logic.CSVData(locator=$3, config=$4, columns=$5, asof=$6)
deconstruct:
$3: logic.CSVLocator = $$.locator
$4: logic.CSVConfig = $$.config
$5: Sequence[logic.CSVColumn] = $$.columns
$5: Sequence[logic.GNFColumn] = $$.columns
$6: String = $$.asof

csv_locator_paths
Expand All @@ -980,12 +982,22 @@ csv_config
construct: $$ = construct_csv_config($3)
deconstruct: $3: Sequence[Tuple[String, logic.Value]] = deconstruct_csv_config($$)

csv_column
: "(" "column" STRING relation_id "[" type* "]" ")"
construct: $$ = logic.CSVColumn(column_name=$3, target_id=$4, types=$6)
gnf_column_path
: STRING
construct: $$ = [$1]
deconstruct if builtin.length($$) == 1:
$1: String = $$[0]
| "[" STRING* "]"
construct: $$ = $2
deconstruct if builtin.length($$) != 1:
$2: Sequence[String] = $$
Copy link
Contributor

@minsungc minsungc Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there semantics for when gnf_column_path is a sequence of length 1? Is it something we can explicitly disallow?


gnf_column
: "(" "column" gnf_column_path relation_id? "[" type* "]" ")"
construct: $$ = logic.GNFColumn(column_path=$3, target_id=$4, types=$6)
deconstruct:
$3: String = $$.column_name
$4: logic.RelationId = $$.target_id
$3: Sequence[String] = $$.column_path
$4: Optional[logic.RelationId] = $$.target_id if builtin.has_proto_field($$, "target_id") else None
$6: Sequence[logic.Type] = $$.types

undefine
Expand All @@ -998,12 +1010,17 @@ context
construct: $$ = transactions.Context(relations=$3)
deconstruct: $3: Sequence[logic.RelationId] = $$.relations

snapshot
: "(" "snapshot" rel_edb_path relation_id ")"
construct: $$ = transactions.Snapshot(destination_path=$3, source_relation=$4)
snapshot_mapping
: edb_path relation_id
construct: $$ = transactions.SnapshotMapping(destination_path=$1, source_relation=$2)
deconstruct:
$3: Sequence[String] = $$.destination_path
$4: logic.RelationId = $$.source_relation
$1: Sequence[String] = $$.destination_path
$2: logic.RelationId = $$.source_relation

snapshot
: "(" "snapshot" snapshot_mapping* ")"
construct: $$ = transactions.Snapshot(mappings=$3)
deconstruct: $3: Sequence[transactions.SnapshotMapping] = $$.mappings

epoch_reads
: "(" "reads" read* ")"
Expand Down Expand Up @@ -1385,6 +1402,7 @@ def deconstruct_export_csv_config(msg: transactions.ExportCSVConfig) -> List[Tup
return builtin.list_sort(result)



Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

def deconstruct_relation_id_string(msg: logic.RelationId) -> String:
name: Optional[String] = builtin.relation_id_to_string(msg)
return builtin.unwrap_option(name)
Expand Down
16 changes: 8 additions & 8 deletions meta/src/meta/yacc_action_parser.py
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude says: the grammar's gnf_column_path rule indexes into a Sequence[String] with $$[0], which requires the sequence/list indexing support added in that file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, GetElement was just for tuple types. We never had a need for indexing into lists, until now.

Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,15 @@ def _infer_type(
elif isinstance(expr, ListExpr):
return ListType(expr.element_type)
elif isinstance(expr, GetElement):
# Infer tuple element type
tuple_type = _infer_type(expr.tuple_expr, line, ctx)
if isinstance(tuple_type, TupleType) and 0 <= expr.index < len(
tuple_type.elements
# Infer element type from tuple or sequence/list indexing
container_type = _infer_type(expr.tuple_expr, line, ctx)
if isinstance(container_type, TupleType) and 0 <= expr.index < len(
container_type.elements
):
return tuple_type.elements[expr.index]
raise YaccGrammarError(
f"Cannot infer type of tuple element access: {expr}", line
)
return container_type.elements[expr.index]
if isinstance(container_type, (SequenceType, ListType)):
return container_type.element_type
raise YaccGrammarError(f"Cannot infer type of element access: {expr}", line)
elif isinstance(expr, GetField):
# GetField has field_type from proto schema lookup (or Unknown if not found)
return expr.field_type
Expand Down
14 changes: 7 additions & 7 deletions proto/relationalai/lqp/v1/logic.proto
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,14 @@ message Attribute {
//
message Data {
oneof data_type {
RelEDB rel_edb = 1;
EDB edb = 1;
BeTreeRelation betree_relation = 2;
CSVData csv_data = 3;
// IcebergRelation iceberg_relation = 4;
// IcebergData iceberg_data = 4;
}
}

message RelEDB {
message EDB {
RelationId target_id = 1;
repeated string path = 2;
repeated Type types = 3;
Expand Down Expand Up @@ -277,7 +277,7 @@ message BeTreeLocator {
message CSVData {
CSVLocator locator = 1;
CSVConfig config = 2;
repeated CSVColumn columns = 3;
repeated GNFColumn columns = 3;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if at some point we can unify or better distinguish between columns used for data ingestion vs data export? We currently also have ExportCSVColumn which is actually pretty similar to GNFColumn, just without the types field.

string asof = 4; // Blob storage timestamp for freshness requirements
}

Expand Down Expand Up @@ -311,9 +311,9 @@ message CSVConfig {
string compression = 11; // "none", "gzip", "zstd", "auto" (default: "auto")
}

message CSVColumn {
string column_name = 1; // Name in CSV file
RelationId target_id = 2; // Target relation
message GNFColumn {
repeated string column_path = 1; // Column identifier path (was: string column_name)
optional RelationId target_id = 2; // Target relation (now explicit optional)
repeated Type types = 3; // Relation signature (key types + value types)
}

Expand Down
11 changes: 8 additions & 3 deletions proto/relationalai/lqp/v1/transactions.proto
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,18 @@ message Context {
repeated RelationId relations = 1;
}

// Demand the source IDB, take an immutable snapshot, and turn it into an EDB under the
// given path (specified as a sequence of strings, see RelEDB).
message Snapshot {
// A single (destination, source) pair within a Snapshot action.
message SnapshotMapping {
repeated string destination_path = 1;
RelationId source_relation = 2;
}

// Demand the source IDBs, take immutable snapshots, and turn them into EDBs under the
// given paths (specified as sequences of strings, see EDB).
message Snapshot {
repeated SnapshotMapping mappings = 1;
}

//
// Export config
//
Expand Down
10 changes: 6 additions & 4 deletions sdks/go/src/lqp/v1/fragments.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading