22
33from typing import Optional
44from dve .core_engine .backends .implementations .duckdb .duckdb_helpers import duckdb_get_entity_count , duckdb_write_parquet
5- from dve .core_engine .backends . utilities import dump_errors
5+ from dve .core_engine .exceptions import CriticalProcessingError
66from dve .core_engine .models import SubmissionInfo
77from dve .core_engine .type_hints import URI , Failed
88from dve .pipeline .duckdb_pipeline import DDBDVEPipeline
99from dve .pipeline .utils import SubmissionStatus
1010from dve .parser import file_handling as fh
11+ from dve .reporting .utils import dump_processing_errors
1112
13+ @duckdb_get_entity_count
14+ @duckdb_write_parquet
1215class FoundryDDBPipeline (DDBDVEPipeline ):
13- """DuckDB pipeline for running on Foundry Platform.
14- Polymorphed to allow for exception handling when processing
15- single files sequentially through services."""
16+ """DuckDB pipeline for running on Foundry Platform"""
1617
1718 def persist_audit_records (self , submission_info : SubmissionInfo ) -> URI :
1819 """Write out key audit relations to parquet for persisting to datasets"""
@@ -29,66 +30,106 @@ def persist_audit_records(self, submission_info: SubmissionInfo) -> URI:
2930
3031 def file_transformation (
3132 self , submission_info : SubmissionInfo
32- ) -> SubmissionInfo | dict [ str , str ]:
33+ ) -> tuple [ SubmissionInfo , SubmissionStatus ]:
3334 try :
3435 return super ().file_transformation (submission_info )
3536 except Exception as exc : # pylint: disable=W0718
3637 self ._logger .error (f"File transformation raised exception: { exc } " )
3738 self ._logger .exception (exc )
38- return submission_info .dict ()
39+ dump_processing_errors (
40+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
41+ "file_transformation" ,
42+ [CriticalProcessingError .from_exception (exc )]
43+ )
44+ self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
45+ return submission_info , SubmissionStatus (processing_failed = True )
3946
40- def apply_data_contract (self , submission_info : SubmissionInfo ) -> tuple [SubmissionInfo | bool ]:
47+ def apply_data_contract (self , submission_info : SubmissionInfo , submission_status : Optional [ SubmissionStatus ] = None ) -> tuple [SubmissionInfo | SubmissionStatus ]:
4148 try :
42- return super ().apply_data_contract (submission_info )
49+ return super ().apply_data_contract (submission_info , submission_status )
4350 except Exception as exc : # pylint: disable=W0718
4451 self ._logger .error (f"Apply data contract raised exception: { exc } " )
4552 self ._logger .exception (exc )
46- return submission_info , True
53+ dump_processing_errors (
54+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
55+ "contract" ,
56+ [CriticalProcessingError .from_exception (exc )]
57+ )
58+ self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
59+ return submission_info , SubmissionStatus (processing_failed = True )
4760
48- def apply_business_rules (self , submission_info : SubmissionInfo , failed : Failed ):
61+ def apply_business_rules (self , submission_info : SubmissionInfo , submission_status : Optional [ SubmissionStatus ] = None ):
4962 try :
50- return super ().apply_business_rules (submission_info , failed )
63+ return super ().apply_business_rules (submission_info , submission_status )
5164 except Exception as exc : # pylint: disable=W0718
5265 self ._logger .error (f"Apply business rules raised exception: { exc } " )
5366 self ._logger .exception (exc )
54- return submission_info , SubmissionStatus (failed = True )
67+ dump_processing_errors (
68+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
69+ "business_rules" ,
70+ [CriticalProcessingError .from_exception (exc )]
71+ )
72+ self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
73+ return submission_info , SubmissionStatus (processing_failed = True )
74+
75+ def error_report (self , submission_info : SubmissionInfo , submission_status : Optional [SubmissionStatus ] = None ):
76+ try :
77+ return super ().error_report (submission_info , submission_status )
78+ except Exception as exc : # pylint: disable=W0718
79+ self ._logger .error (f"Error reports raised exception: { exc } " )
80+ self ._logger .exception (exc )
81+ sub_stats = None
82+ report_uri = None
83+ dump_processing_errors (
84+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
85+ "error_report" ,
86+ [CriticalProcessingError .from_exception (exc )]
87+ )
88+ self ._audit_tables .mark_failed (submissions = [submission_info .submission_id ])
89+ return submission_info , submission_status , sub_stats , report_uri
5590
56- def run_pipeline (self , submission_info : SubmissionInfo ) -> tuple [Optional [URI ], URI , URI ]:
91+ def run_pipeline (self , submission_info : SubmissionInfo ) -> tuple [Optional [URI ], Optional [ URI ] , URI ]:
5792 """Sequential single submission pipeline runner"""
5893 try :
5994 sub_id : str = submission_info .submission_id
95+ report_uri = None
6096 self ._audit_tables .add_new_submissions (submissions = [submission_info ])
6197 self ._audit_tables .mark_transform (submission_ids = [sub_id ])
62- sub_info = self .file_transformation (submission_info = submission_info )
63- if isinstance ( sub_info , SubmissionInfo ):
98+ sub_info , sub_status = self .file_transformation (submission_info = submission_info )
99+ if not ( sub_status . validation_failed or sub_status . processing_failed ):
64100 self ._audit_tables .mark_data_contract (submission_ids = [sub_id ])
65- sub_info , failed = self .apply_data_contract (submission_info = submission_info )
66- self ._audit_tables .mark_business_rules (submissions = [(sub_id , failed )])
101+ sub_info , sub_status = self .apply_data_contract (submission_info = sub_info , submission_status = sub_status )
102+ self ._audit_tables .mark_business_rules (submissions = [(sub_id , sub_status . validation_failed )])
67103 sub_info , sub_status = self .apply_business_rules (
68- submission_info = submission_info , failed = failed
104+ submission_info = submission_info , submission_status = sub_status
69105 )
70- else :
71- sub_status = SubmissionStatus ( failed = True )
72- self ._audit_tables .mark_error_report (
73- submissions = [(sub_id , sub_status .submission_result )]
74- )
75- sub_info , sub_status , sub_stats , report_uri = self .error_report (
76- submission_info = submission_info , status = sub_status
77- )
78- self ._audit_tables .add_submission_statistics_records (sub_stats = [sub_stats ])
106+
107+ if not sub_status . processing_failed :
108+ self ._audit_tables .mark_error_report (
109+ submissions = [(sub_id , sub_status .submission_result )]
110+ )
111+ sub_info , sub_status , sub_stats , report_uri = self .error_report (
112+ submission_info = submission_info , submission_status = sub_status
113+ )
114+ self ._audit_tables .add_submission_statistics_records (sub_stats = [sub_stats ])
79115 except Exception as err : # pylint: disable=W0718
80116 self ._logger .error (
81117 f"During processing of submission_id: { sub_id } , the following exception was raised: { err } "
82118 )
119+ dump_processing_errors (
120+ fh .joinuri (self .processed_files_path , submission_info .submission_id ),
121+ "run_pipeline" ,
122+ [CriticalProcessingError .from_exception (err )]
123+ )
83124 self ._audit_tables .mark_failed (submissions = [sub_id ])
84125 finally :
85126 audit_files_uri = self .persist_audit_records (submission_info = submission_info )
86- return (
87- (
88- None
89- if sub_status .failed
90- else fh .joinuri (self .processed_files_path , sub_id , "business_rules" )
91- ),
92- report_uri ,
93- audit_files_uri ,
94- )
127+ return (
128+ (
129+ None
130+ if ( sub_status . validation_failed or sub_status .processing_failed )
131+ else fh .joinuri (self .processed_files_path , sub_id , "business_rules" )
132+ ),
133+ report_uri if report_uri else None ,
134+ audit_files_uri ,
135+ )
0 commit comments