@@ -362,7 +362,7 @@ def file_transformation_step(
362362 starmap (
363363 lambda x , _ : (
364364 x .submission_id ,
365- "failed " ,
365+ "validation_failed " ,
366366 ),
367367 failed ,
368368 )
@@ -377,9 +377,10 @@ def file_transformation_step(
377377
378378 return success , failed
379379
380- def apply_data_contract (self , submission_info : SubmissionInfo , submission_status : SubmissionStatus ) -> tuple [SubmissionInfo , SubmissionStatus ]:
380+ def apply_data_contract (self , submission_info : SubmissionInfo , submission_status : Optional [ SubmissionStatus ] = None ) -> tuple [SubmissionInfo , SubmissionStatus ]:
381381 """Method for applying the data contract given a submission_info"""
382-
382+ if not submission_status :
383+ submission_status = self ._audit_tables .get_submission_status (submission_info .submission_id )
383384 if not self .processed_files_path :
384385 raise AttributeError ("processed files path not provided" )
385386
@@ -422,14 +423,17 @@ def apply_data_contract(self, submission_info: SubmissionInfo, submission_status
422423 return submission_info , submission_status
423424
424425 def data_contract_step (
425- self , pool : Executor , file_transform_results : list [tuple [SubmissionInfo , SubmissionStatus ]]
426+ self , pool : Executor , file_transform_results : list [tuple [SubmissionInfo , Optional [ SubmissionStatus ] ]]
426427 ) -> tuple [list [tuple [SubmissionInfo , SubmissionStatus ]], list [tuple [SubmissionInfo , SubmissionStatus ]]]:
427428 """Step to validate the types of an untyped (stringly typed) parquet file"""
428429 processed_files : list [tuple [SubmissionInfo , SubmissionStatus ]] = []
429430 failed_processing : list [tuple [SubmissionInfo , SubmissionStatus ]] = []
430431 dc_futures : list [tuple [SubmissionInfo , SubmissionStatus , Future ]] = []
431432
432433 for info , sub_status in file_transform_results :
434+ sub_status = (
435+ sub_status if sub_status
436+ else self ._audit_tables .get_submission_status (info .submission_id ))
433437 dc_futures .append ((info , sub_status , pool .submit (self .apply_data_contract , info , sub_status )))
434438
435439 for sub_info , sub_status , future in dc_futures :
@@ -476,10 +480,13 @@ def data_contract_step(
476480
477481 return processed_files , failed_processing
478482
479- def apply_business_rules (self , submission_info : SubmissionInfo , submission_status : SubmissionStatus ):
483+ def apply_business_rules (self , submission_info : SubmissionInfo , submission_status : Optional [ SubmissionStatus ] = None ):
480484 """Apply the business rules to a given submission, the submission may have failed at the
481485 data_contract step so this should be passed in as a bool
482486 """
487+ if not submission_status :
488+ submission_status = self ._audit_tables .get_submission_status (submission_info .submission_id )
489+
483490 if not self .rules_path :
484491 raise AttributeError ("business rules path not provided." )
485492
@@ -557,7 +564,7 @@ def apply_business_rules(self, submission_info: SubmissionInfo, submission_statu
557564 def business_rule_step (
558565 self ,
559566 pool : Executor ,
560- files : list [tuple [SubmissionInfo , SubmissionStatus ]],
567+ files : list [tuple [SubmissionInfo , Optional [ SubmissionStatus ] ]],
561568 ) -> tuple [
562569 list [tuple [SubmissionInfo , SubmissionStatus ]],
563570 list [tuple [SubmissionInfo , SubmissionStatus ]],
@@ -567,6 +574,9 @@ def business_rule_step(
567574 future_files : list [tuple [SubmissionInfo , SubmissionStatus , Future ]] = []
568575
569576 for submission_info , submission_status in files :
577+ submission_status = (
578+ submission_status if submission_status
579+ else self ._audit_tables .get_submission_status (submission_info .submission_id ))
570580 future_files .append (
571581 (
572582 submission_info ,
@@ -677,8 +687,14 @@ def _get_error_dataframes(self, submission_id: str):
677687
678688 return errors_df , aggregates
679689
680- def error_report (self , submission_info : SubmissionInfo , submission_status : SubmissionStatus ) -> tuple [SubmissionInfo , SubmissionStatus , Optional [SubmissionStatisticsRecord ], Optional [URI ]]:
690+ def error_report (self ,
691+ submission_info : SubmissionInfo ,
692+ submission_status : Optional [SubmissionStatus ] = None ) -> tuple [SubmissionInfo , SubmissionStatus , Optional [SubmissionStatisticsRecord ], Optional [URI ]]:
681693 """Creates the error reports given a submission info and submission status"""
694+
695+ if not submission_status :
696+ submission_status = self ._audit_tables .get_submission_status (submission_info .submission_id )
697+
682698 if not self .processed_files_path :
683699 raise AttributeError ("processed files path not provided" )
684700
@@ -728,7 +744,7 @@ def error_report(self, submission_info: SubmissionInfo, submission_status: Submi
728744 def error_report_step (
729745 self ,
730746 pool : Executor ,
731- processed : Iterable [tuple [SubmissionInfo , SubmissionStatus ]] = tuple (),
747+ processed : Iterable [tuple [SubmissionInfo , Optional [ SubmissionStatus ] ]] = tuple (),
732748 failed_file_transformation : Iterable [tuple [SubmissionInfo , SubmissionStatus ]] = tuple (),
733749 ) -> list [
734750 tuple [SubmissionInfo , SubmissionStatus , Union [None , SubmissionStatisticsRecord ], URI ]
@@ -743,6 +759,10 @@ def error_report_step(
743759 failed_processing : list [tuple [SubmissionInfo , SubmissionStatus ]] = []
744760
745761 for info , status in processed :
762+ status = (
763+ status if status
764+ else self ._audit_tables .get_submission_status (info .submission_id )
765+ )
746766 futures .append ((info , status , pool .submit (self .error_report , info , status )))
747767
748768 for info_dict , status in failed_file_transformation :
0 commit comments