11"""A duckdb pipeline for running on Foundry platform"""
2- from typing import List , Optional , Tuple
3- from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import duckdb_write_parquet
2+
3+ from typing import Optional
4+ from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import duckdb_get_entity_count , duckdb_write_parquet
45from dve .core_engine .backends .utilities import dump_errors
56from dve .core_engine .models import SubmissionInfo
67from dve .core_engine .type_hints import URI , Failed
78from dve .pipeline .duckdb_pipeline import DDBDVEPipeline
89from dve .pipeline .utils import SubmissionStatus
910from dve .parser import file_handling as fh
1011
11- @duckdb_write_parquet
1212class FoundryDDBPipeline (DDBDVEPipeline ):
1313 """DuckDB pipeline for running on Foundry Platform"""
14+
1415 def persist_audit_records (self , submission_info : SubmissionInfo ) -> URI :
1516 """Write out key audit relations to parquet for persisting to datasets"""
1617 write_to = fh .joinuri (self .processed_files_path , submission_info .submission_id , "audit/" )
1718 self .write_parquet (
1819 self ._audit_tables ._processing_status .get_relation (),
19- write_to + "processing_status.parquet" )
20+ write_to + "processing_status.parquet" ,
21+ )
2022 self .write_parquet (
2123 self ._audit_tables ._submission_statistics .get_relation (),
22- write_to + "submission_statistics.parquet" )
24+ write_to + "submission_statistics.parquet" ,
25+ )
2326 return write_to
24-
25- def file_transformation (self , submission_info : SubmissionInfo ) -> SubmissionInfo | dict [str , str ]:
27+
28+ def file_transformation (
29+ self , submission_info : SubmissionInfo
30+ ) -> SubmissionInfo | dict [str , str ]:
2631 try :
2732 return super ().file_transformation (submission_info )
28- except Exception as exc :
33+ except Exception as exc : # pylint: disable=W0718
2934 self ._logger .error (f"File transformation raised exception: { exc } " )
3035 self ._logger .exception (exc )
31- # TODO: write errors to file here (maybe processing errors - not to be seen by end user)
3236 return submission_info .dict ()
33-
34- def apply_data_contract (self , submission_info : SubmissionInfo ) -> Tuple [SubmissionInfo | bool ]:
37+
38+ def apply_data_contract (self , submission_info : SubmissionInfo ) -> tuple [SubmissionInfo | bool ]:
3539 try :
3640 return super ().apply_data_contract (submission_info )
37- except Exception as exc :
41+ except Exception as exc : # pylint: disable=W0718
3842 self ._logger .error (f"Apply data contract raised exception: { exc } " )
3943 self ._logger .exception (exc )
40- # TODO: write errors to file here (maybe processing errors - not to be seen by end user)
4144 return submission_info , True
42-
45+
4346 def apply_business_rules (self , submission_info : SubmissionInfo , failed : Failed ):
4447 try :
4548 return super ().apply_business_rules (submission_info , failed )
46- except Exception as exc :
49+ except Exception as exc : # pylint: disable=W0718
4750 self ._logger .error (f"Apply business rules raised exception: { exc } " )
4851 self ._logger .exception (exc )
49- # TODO: write errors to file here (maybe processing errors - not to be seen by end user)
5052 return submission_info , SubmissionStatus (failed = True )
51-
52-
53- def run_pipeline (self , submission_info : SubmissionInfo ) -> Tuple [Optional [URI ], URI , URI ]:
53+
54+ def run_pipeline (self , submission_info : SubmissionInfo ) -> tuple [Optional [URI ], URI , URI ]:
5455 """Sequential single submission pipeline runner"""
5556 try :
5657 sub_id : str = submission_info .submission_id
@@ -61,23 +62,31 @@ def run_pipeline(self, submission_info: SubmissionInfo) -> Tuple[Optional[URI],
6162 self ._audit_tables .mark_data_contract (submission_ids = [sub_id ])
6263 sub_info , failed = self .apply_data_contract (submission_info = submission_info )
6364 self ._audit_tables .mark_business_rules (submissions = [(sub_id , failed )])
64- sub_info , sub_status = self .apply_business_rules (submission_info = submission_info , failed = failed )
65+ sub_info , sub_status = self .apply_business_rules (
66+ submission_info = submission_info , failed = failed
67+ )
6568 else :
66- sub_status = SubmissionStatus (failed = True )
67- self ._audit_tables .mark_error_report (submissions = [(sub_id , sub_status .submission_result )])
68- sub_info , sub_status , sub_stats , report_uri = self .error_report (submission_info = submission_info , status = sub_status )
69+ sub_status = SubmissionStatus (failed = True )
70+ self ._audit_tables .mark_error_report (
71+ submissions = [(sub_id , sub_status .submission_result )]
72+ )
73+ sub_info , sub_status , sub_stats , report_uri = self .error_report (
74+ submission_info = submission_info , status = sub_status
75+ )
6976 self ._audit_tables .add_submission_statistics_records (sub_stats = [sub_stats ])
70- except Exception as err :
71- self ._logger .error (f"During processing of submission_id: { sub_id } , the following exception was raised: { err } " )
77+ except Exception as err : # pylint: disable=W0718
78+ self ._logger .error (
79+ f"During processing of submission_id: { sub_id } , the following exception was raised: { err } "
80+ )
7281 self ._audit_tables .mark_failed (submissions = [sub_id ])
7382 finally :
7483 audit_files_uri = self .persist_audit_records (submission_info = submission_info )
75- return (
76- None if sub_status .failed else fh .joinuri (
77- self .processed_files_path ,
78- sub_id ,
79- "business_rules" ),
84+ return (
85+ (
86+ None
87+ if sub_status .failed
88+ else fh .joinuri (self .processed_files_path , sub_id , "business_rules" )
89+ ),
8090 report_uri ,
81- audit_files_uri
91+ audit_files_uri ,
8292 )
83-
0 commit comments