1+ """A duckdb pipeline for running on Foundry platform"""
2+ from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import duckdb_write_parquet
3+ from dve .core_engine .models import SubmissionInfo
4+ from dve .pipeline .duckdb_pipeline import DDBDVEPipeline
5+ from dve .pipeline .utils import SubmissionStatus
6+ from dve .parser import file_handling as fh
7+
8+ @duckdb_write_parquet
9+ class FoundryDDBPipeline (DDBDVEPipeline ):
10+ """DuckDB pipeline for running on Foundry Platform"""
11+ def persist_audit_records (self , submission_info : SubmissionInfo ):
12+ """Write out key audit relations to parquet for persisting to datasets"""
13+ write_to = fh .joinuri (self .processed_files_path , submission_info .submission_id , "audit/" )
14+ self .write_parquet (
15+ self ._audit_tables ._processing_status .get_relation (),
16+ write_to + "processing_status.parquet" )
17+ self .write_parquet (
18+ self ._audit_tables ._submission_statistics .get_relation (),
19+ write_to + "submission_statistics.parquet" )
20+
21+ def run_pipeline (self , submission_info : SubmissionInfo ):
22+ """Sequential single submission pipeline runner"""
23+ try :
24+ sub_id : str = submission_info .submission_id
25+ self ._audit_tables .add_new_submissions (submissions = [submission_info ])
26+ self ._audit_tables .mark_transform (submission_ids = [sub_id ])
27+ sub_info = self .file_transformation (submission_info = submission_info )
28+ if isinstance (sub_info , SubmissionInfo ):
29+ self ._audit_tables .mark_data_contract (submission_ids = [sub_id ])
30+ sub_info , failed = self .apply_data_contract (submission_info = submission_info )
31+ self ._audit_tables .mark_business_rules (submissions = [(sub_info , failed )])
32+ sub_info , sub_status = self .apply_business_rules (submission_info = submission_info , failed = failed )
33+ else :
34+ sub_status = SubmissionStatus (failed = True )
35+ self ._audit_tables .mark_error_report (submissions = [(sub_id , sub_status .submission_result )])
36+ sub_info , sub_status , sub_stats = self .error_report (submission_info = submission_info )
37+ self ._audit_tables .add_submission_statistics_records (subs_stats = [sub_stats ])
38+ except Exception as err :
39+ self ._logger .error (f"During processing of submission_id: { sub_id } , the following exception was raised: { err } " )
40+ self ._audit_tables .mark_failed (submissions = [sub_id ])
41+ finally :
42+ self .persist_audit_records (submission_info = submission_info )
43+
0 commit comments