11"""OpenLineage event emitter for SQLMesh."""
22from __future__ import annotations
33
4+ import logging
45import typing as t
56from datetime import datetime , timezone
67
8+ logger = logging .getLogger (__name__ )
9+
710if t .TYPE_CHECKING :
811 from sqlmesh .core .snapshot import Snapshot
912 from sqlmesh .core .snapshot .definition import Interval
1013 from sqlmesh .core .snapshot .execution_tracker import QueryExecutionStats
1114
15+ PRODUCER = "https://github.com/sidequery/sqlmesh-openlineage"
16+
1217
1318class OpenLineageEmitter :
1419 """Emits OpenLineage events for SQLMesh operations."""
@@ -20,6 +25,7 @@ def __init__(
2025 api_key : t .Optional [str ] = None ,
2126 ):
2227 from openlineage .client import OpenLineageClient
28+ from openlineage .client .client import OpenLineageClientOptions
2329
2430 self .namespace = namespace
2531 self .url = url
@@ -32,15 +38,82 @@ def __init__(
3238 elif api_key :
3339 self .client = OpenLineageClient (
3440 url = url ,
35- options = { " api_key" : api_key } ,
41+ options = OpenLineageClientOptions ( api_key = api_key ) ,
3642 )
3743 else :
3844 self .client = OpenLineageClient (url = url )
3945
46+ def _build_job_facets (self , snapshot : "Snapshot" ) -> t .Dict [str , t .Any ]:
47+ """Build job facets including SQL, job type, and source code location."""
48+ from openlineage .client .facet_v2 import job_type_job , sql_job , source_code_location_job
49+
50+ facets : t .Dict [str , t .Any ] = {}
51+
52+ # JobTypeJobFacet: identify as SQLMesh batch job
53+ facets ["jobType" ] = job_type_job .JobTypeJobFacet (
54+ processingType = "BATCH" ,
55+ integration = "SQLMESH" ,
56+ jobType = "MODEL" ,
57+ )
58+
59+ # SQLJobFacet: include the model SQL query
60+ try :
61+ if snapshot .is_model and snapshot .model :
62+ query = snapshot .model .query
63+ if query is not None :
64+ sql_str = str (query )
65+ if sql_str :
66+ facets ["sql" ] = sql_job .SQLJobFacet (query = sql_str )
67+ except Exception :
68+ pass
69+
70+ # SourceCodeLocationJobFacet: include model file path
71+ try :
72+ if snapshot .is_model and snapshot .model :
73+ model_path = getattr (snapshot .model , "_path" , None )
74+ if model_path is not None :
75+ path_str = str (model_path )
76+ if path_str :
77+ facets ["sourceCodeLocation" ] = (
78+ source_code_location_job .SourceCodeLocationJobFacet (
79+ type = "file" ,
80+ url = f"file://{ path_str } " ,
81+ )
82+ )
83+ except Exception :
84+ pass
85+
86+ return facets
87+
88+ def _build_processing_engine_facet (self ) -> t .Dict [str , t .Any ]:
89+ """Build run facets for processing engine info."""
90+ from openlineage .client .facet_v2 import processing_engine_run
91+
92+ facets : t .Dict [str , t .Any ] = {}
93+
94+ try :
95+ from sqlmesh import __version__ as sqlmesh_version
96+ except ImportError :
97+ sqlmesh_version = "unknown"
98+
99+ try :
100+ from sqlmesh_openlineage import __version__ as adapter_version
101+ except ImportError :
102+ adapter_version = "unknown"
103+
104+ facets ["processing_engine" ] = processing_engine_run .ProcessingEngineRunFacet (
105+ version = sqlmesh_version ,
106+ name = "SQLMesh" ,
107+ openlineageAdapterVersion = adapter_version ,
108+ )
109+
110+ return facets
111+
40112 def emit_snapshot_start (
41113 self ,
42114 snapshot : "Snapshot" ,
43115 run_id : str ,
116+ snapshots : t .Optional [t .Dict [str , "Snapshot" ]] = None ,
44117 ) -> None :
45118 """Emit a START event for snapshot evaluation."""
46119 from openlineage .client .event_v2 import RunEvent , RunState , Run , Job
@@ -50,19 +123,25 @@ def emit_snapshot_start(
50123 snapshot_to_input_datasets ,
51124 )
52125
53- inputs = snapshot_to_input_datasets (snapshot , self .namespace )
126+ inputs = snapshot_to_input_datasets (snapshot , self .namespace , snapshots = snapshots )
54127 output = snapshot_to_output_dataset (snapshot , self .namespace )
55128
129+ job_facets = self ._build_job_facets (snapshot )
130+ run_facets = self ._build_processing_engine_facet ()
131+
56132 event = RunEvent (
57133 eventType = RunState .START ,
58134 eventTime = datetime .now (timezone .utc ).isoformat (),
59- run = Run (runId = run_id ),
60- job = Job (namespace = self .namespace , name = snapshot .name ),
135+ run = Run (runId = run_id , facets = run_facets ),
136+ job = Job (namespace = self .namespace , name = snapshot .name , facets = job_facets ),
61137 inputs = inputs ,
62138 outputs = [output ] if output else [],
63- producer = "sqlmesh-openlineage" ,
139+ producer = PRODUCER ,
64140 )
65- self .client .emit (event )
141+ try :
142+ self .client .emit (event )
143+ except Exception :
144+ logger .warning ("Failed to emit %s event for %s" , event .eventType , snapshot .name , exc_info = True )
66145
67146 def emit_snapshot_complete (
68147 self ,
@@ -71,33 +150,46 @@ def emit_snapshot_complete(
71150 interval : t .Optional ["Interval" ] = None ,
72151 duration_ms : t .Optional [int ] = None ,
73152 execution_stats : t .Optional ["QueryExecutionStats" ] = None ,
153+ snapshots : t .Optional [t .Dict [str , "Snapshot" ]] = None ,
74154 ) -> None :
75155 """Emit a COMPLETE event for snapshot evaluation."""
76156 from openlineage .client .event_v2 import RunEvent , RunState , Run , Job
77157
78- from sqlmesh_openlineage .datasets import snapshot_to_output_dataset
158+ from sqlmesh_openlineage .datasets import (
159+ snapshot_to_output_dataset ,
160+ snapshot_to_input_datasets ,
161+ )
79162 from sqlmesh_openlineage .facets import build_run_facets , build_output_facets
80163
81164 run_facets = build_run_facets (
82165 duration_ms = duration_ms ,
83166 execution_stats = execution_stats ,
84167 )
168+ run_facets .update (self ._build_processing_engine_facet ())
85169
86170 output = snapshot_to_output_dataset (
87171 snapshot ,
88172 self .namespace ,
89173 facets = build_output_facets (execution_stats ),
90174 )
91175
176+ inputs = snapshot_to_input_datasets (snapshot , self .namespace , snapshots = snapshots )
177+
178+ job_facets = self ._build_job_facets (snapshot )
179+
92180 event = RunEvent (
93181 eventType = RunState .COMPLETE ,
94182 eventTime = datetime .now (timezone .utc ).isoformat (),
95183 run = Run (runId = run_id , facets = run_facets ),
96- job = Job (namespace = self .namespace , name = snapshot .name ),
184+ job = Job (namespace = self .namespace , name = snapshot .name , facets = job_facets ),
185+ inputs = inputs ,
97186 outputs = [output ] if output else [],
98- producer = "sqlmesh-openlineage" ,
187+ producer = PRODUCER ,
99188 )
100- self .client .emit (event )
189+ try :
190+ self .client .emit (event )
191+ except Exception :
192+ logger .warning ("Failed to emit %s event for %s" , event .eventType , snapshot .name , exc_info = True )
101193
102194 def emit_snapshot_fail (
103195 self ,
@@ -124,6 +216,9 @@ def emit_snapshot_fail(
124216 },
125217 ),
126218 job = Job (namespace = self .namespace , name = snapshot .name ),
127- producer = "sqlmesh-openlineage" ,
219+ producer = PRODUCER ,
128220 )
129- self .client .emit (event )
221+ try :
222+ self .client .emit (event )
223+ except Exception :
224+ logger .warning ("Failed to emit %s event for %s" , event .eventType , snapshot .name , exc_info = True )
0 commit comments