process_tracker_python-124 Add Extract Tracker finder

Alex Meadows · Alex Meadows · commit c30b2213397f · 2019-11-15T13:15:10.000-05:00
✨ Added ability to recreate ExtractTracker object based on extract_id Like with ProcessTracker, ExtractTracker can now re-instantiate objects that are in the middle of processing and can continue to be processed. Closes #124
diff --git a/process_tracker/extract_tracker.py b/process_tracker/extract_tracker.py
@@ -19,20 +19,22 @@
     ExtractProcess,
     ExtractStatus,
 )
+from process_tracker.models.source import DatasetType
 
 
 class ExtractTracker:
     def __init__(
         self,
         process_run,
-        filename,
+        filename=None,
         location=None,
         location_name=None,
         location_path=None,
         status=None,
         compression_type=None,
         filetype=None,
         config_location=None,
+        extract_id=None,
     ):
         """
         ExtractTracker is the primary engine for tracking data extracts
@@ -55,6 +57,9 @@ def __init__(
         :type filetype: string
         :param config_location: Optional location for the process_tracker configuration file.
         :type config_location: string
+        :param extract_id: If trying to work with a specific extract that's in process, provide the id and it will be
+        reconstructed.
+        :type extract_id: int
         """
         log_level = SettingsManager(
             config_location=config_location
@@ -68,117 +73,133 @@ def __init__(
         self.data_store = self.process_run.data_store
         self.session = self.process_run.session
 
-        self.filename = filename
-
-        if location is not None:
-            self.logger.info("Location object provided.")
-            self.location = location
-        elif location_path is not None:
-            self.logger.info("Location path provided.  Creating Location object.")
-            self.location = LocationTracker(
-                location_name=location_name,
-                location_path=location_path,
-                data_store=self.data_store,
+        # Getting all status types in the event there are custom status types added later.
+        self.extract_status_types = self.get_extract_status_types()
+
+        # For specific status types, need to retrieve their ids to be used for those status types' logic.
+
+        self.extract_status_initializing = self.extract_status_types["initializing"]
+        self.extract_status_ready = self.extract_status_types["ready"]
+        self.extract_status_loading = self.extract_status_types["loading"]
+        self.extract_status_loaded = self.extract_status_types["loaded"]
+        self.extract_status_archived = self.extract_status_types["archived"]
+        self.extract_status_deleted = self.extract_status_types["deleted"]
+        self.extract_status_error = self.extract_status_types["error"]
+
+        if extract_id is not None:
+            self.logger.info("Extract id provided.  Attempting to reconstruct.")
+
+            extract = self.data_store.get_or_create_item(
+                model=Extract, extract_id=extract_id, create=False
             )
-        else:
-            raise Exception("A location object or location_path must be provided.")
+            self.filename = extract.extract_filename
+            self.location = extract.locations
+            self.compression_type = extract.compression_type
+            if self.compression_type is None:
+                self.compression_type_id = None
+            else:
+                self.compression_type_id = self.compression_type.compression_type_id
+            self.filetype = extract.extract_filetype
+            self.extract = extract
+            self.full_filename = self.get_full_filename()
+            self.dataset_types = self.get_dataset_types()
+            self.extract_process = self.retrieve_extract_process()
 
-        if compression_type is not None:
-            self.logger.info("Finding compression type.")
-            try:
-                self.compression_type = self.data_store.get_or_create_item(
-                    model=ExtractCompressionType,
-                    create=False,
-                    extract_compression_type=compression_type,
-                )
-            except Exception:
-                error_msg = "%s is not a valid compression type." % compression_type
+        else:
+            if filename is None:
+                error_msg = "Filename must be provided."
                 self.logger.error(error_msg)
                 raise Exception(error_msg)
 
-            self.compression_type_id = self.compression_type.extract_compression_type_id
-        else:
-            self.compression_type_id = None
+            self.filename = filename
+
+            if location is not None:
+                self.logger.info("Location object provided.")
+                self.location = location
+            elif location_path is not None:
+                self.logger.info("Location path provided.  Creating Location object.")
+                self.location = LocationTracker(
+                    location_name=location_name,
+                    location_path=location_path,
+                    data_store=self.data_store,
+                )
+            else:
+                raise Exception("A location object or location_path must be provided.")
+
+            if compression_type is not None:
+                self.logger.info("Finding compression type.")
+                try:
+                    self.compression_type = self.data_store.get_or_create_item(
+                        model=ExtractCompressionType,
+                        create=False,
+                        extract_compression_type=compression_type,
+                    )
+                except Exception:
+                    error_msg = "%s is not a valid compression type." % compression_type
+                    self.logger.error(error_msg)
+                    raise Exception(error_msg)
 
-        if filetype is not None:
-            self.logger.info("File type provided.  Verifying it is a valid filetype.")
-            try:
-                self.filetype = self.data_store.get_or_create_item(
-                    model=ExtractFileType, create=False, extract_filetype=filetype
+                self.compression_type_id = (
+                    self.compression_type.extract_compression_type_id
                 )
-            except Exception:
-                error_msg = "%s is not a valid file type." % filetype
-                self.logger.error(error_msg)
-                raise Exception(error_msg)
-        else:
-            # Need to try to determine the filetype based on the extension of the filename.
-            file_extension = os.path.splitext(filename)[1]
-            file_extension = file_extension.replace(".", "")
-            self.logger.info(
-                "Trying to find record for file extension: %s" % file_extension
-            )
-            self.filetype = self.data_store.get_or_create_item(
-                model=ExtractFileType,
-                create=False,
-                extract_filetype_code=file_extension,
-            )
+            else:
+                self.compression_type_id = None
 
-        self.logger.info("Registering extract.")
+            if filetype is not None:
+                self.logger.info(
+                    "File type provided.  Verifying it is a valid filetype."
+                )
+                try:
+                    self.filetype = self.data_store.get_or_create_item(
+                        model=ExtractFileType, create=False, extract_filetype=filetype
+                    )
+                except Exception:
+                    error_msg = "%s is not a valid file type." % filetype
+                    self.logger.error(error_msg)
+                    raise Exception(error_msg)
+            else:
+                # Need to try to determine the filetype based on the extension of the filename.
+                file_extension = os.path.splitext(filename)[1]
+                file_extension = file_extension.replace(".", "")
+                self.logger.info(
+                    "Trying to find record for file extension: %s" % file_extension
+                )
+                self.filetype = self.data_store.get_or_create_item(
+                    model=ExtractFileType,
+                    create=False,
+                    extract_filetype_code=file_extension,
+                )
 
-        self.extract = self.data_store.get_or_create_item(
-            model=Extract,
-            extract_filename=filename,
-            extract_location_id=self.location.location.location_id,
-            extract_compression_type_id=self.compression_type_id,
-            extract_filetype_id=self.filetype.extract_filetype_id,
-        )
+            self.logger.info("Registering extract.")
 
-        if location_path is not None:
-            self.logger.info(
-                "Location path was provided so building file path from it."
+            self.extract = self.data_store.get_or_create_item(
+                model=Extract,
+                extract_filename=filename,
+                extract_location_id=self.location.location.location_id,
+                extract_compression_type_id=self.compression_type_id,
+                extract_filetype_id=self.filetype.extract_filetype_id,
             )
 
-            self.full_filename = str(Path(location_path).joinpath(filename))
-        else:
-            self.logger.info("Location provided so building file path from it.")
+            self.full_filename = self.get_full_filename(location_path=location_path)
 
-            self.full_filename = str(
-                Path(self.location.location_path).joinpath(
-                    self.extract.extract_filename
+            if self.process_run.dataset_types is not None:
+                self.logger.info("Associating dataset type(s) with extract.")
+                self.dataset_types = self.register_extract_dataset_types(
+                    dataset_types=self.process_run.dataset_types
                 )
-            )
+            else:
+                self.dataset_types = None
 
-        if self.process_run.dataset_types is not None:
-            self.logger.info("Associating dataset type(s) with extract.")
-            self.dataset_types = self.register_extract_dataset_types(
-                dataset_types=self.process_run.dataset_types
-            )
-        else:
-            self.dataset_types = None
-
-        # Getting all status types in the event there are custom status types added later.
-        self.extract_status_types = self.get_extract_status_types()
-
-        # For specific status types, need to retrieve their ids to be used for those status types' logic.
-
-        self.extract_status_initializing = self.extract_status_types["initializing"]
-        self.extract_status_ready = self.extract_status_types["ready"]
-        self.extract_status_loading = self.extract_status_types["loading"]
-        self.extract_status_loaded = self.extract_status_types["loaded"]
-        self.extract_status_archived = self.extract_status_types["archived"]
-        self.extract_status_deleted = self.extract_status_types["deleted"]
-        self.extract_status_error = self.extract_status_types["error"]
+            self.extract_process = self.retrieve_extract_process()
 
-        self.extract_process = self.retrieve_extract_process()
+            if status is not None:
+                self.logger.info("Status was provided by user.")
+                self.change_extract_status(new_status=status)
+            else:
+                self.logger.info("Status was not provided.  Initializing.")
+                self.extract.extract_status_id = self.extract_status_initializing
 
-        if status is not None:
-            self.logger.info("Status was provided by user.")
-            self.change_extract_status(new_status=status)
-        else:
-            self.logger.info("Status was not provided.  Initializing.")
-            self.extract.extract_status_id = self.extract_status_initializing
-
-        self.session.commit()
+            self.session.commit()
 
     def add_dependency(self, dependency_type, dependency):
         """
@@ -330,6 +351,21 @@ def extract_dependency_check(self, extracts=None):
         else:
             return False
 
+    def get_dataset_types(self):
+        """
+        Get list of dataset types associated to extract and return list.
+        :return:
+        """
+        dataset_types = list()
+
+        for type in self.extract.extract_dataset_types:
+            dataset_type = self.data_store.get_or_create_item(
+                model=DatasetType, dataset_type_id=type.dataset_type_id
+            )
+            dataset_types.append(dataset_type)
+
+        return dataset_types
+
     def get_extract_status_types(self):
         """
         Get list of process status types and return dictionary.
@@ -344,6 +380,32 @@ def get_extract_status_types(self):
 
         return status_types
 
+    def get_full_filename(self, location_path=None):
+        """
+        Build full filepath to file based on location's path and the filename.
+        :param location_path: If provided, will use for filepath, otherwise will attempt to obtain
+        from extract location.
+        :type location_path: str
+        :return: full filepath as string
+        """
+
+        if location_path is not None:
+            self.logger.info(
+                "Location path was provided so building file path from it."
+            )
+
+            full_filename = str(Path(location_path).joinpath(self.filename))
+        else:
+            self.logger.info("Location provided so building file path from it.")
+
+            full_filename = str(
+                Path(self.location.location_path).joinpath(
+                    self.extract.extract_filename
+                )
+            )
+
+        return full_filename
+
     def register_extract_dataset_types(self, dataset_types):
         """
         For the provided dataset types from process_run instance, associate with given Extract instance.
diff --git a/process_tracker/models/extract.py b/process_tracker/models/extract.py
@@ -129,6 +129,8 @@ class Extract(Base):
         nullable=True,
     )
 
+    compression_type = relationship("ExtractCompressionType")
+    extract_filetype = relationship("ExtractFileType")
     extract_dataset_types = relationship(
         "ExtractDatasetType",
         back_populates="dataset_type_extracts",
diff --git a/tests/test_extract_tracker.py b/tests/test_extract_tracker.py
@@ -606,3 +606,57 @@ def test_set_file_type_invalid(self):
             )
 
         self.assertTrue("zap is not a valid file type" in str(context.exception))
+
+    def test_extract_tracker_with_extract_id(self):
+        """
+        Testing that when providing a extract_id to ExtractTracker, the instance is returned instead of being created.
+        :return:
+        """
+
+        extract_id = self.extract.extract.extract_id
+
+        new_extract_tracker = ExtractTracker(
+            extract_id=extract_id, process_run=self.process_run
+        )
+
+        expected_filename = self.extract.extract.extract_filename
+        given_filename = new_extract_tracker.extract.extract_filename
+
+        expected_location = self.extract.location.location_name
+        given_location = new_extract_tracker.location.location_name
+
+        expected_compression_type = self.extract.compression_type_id
+        given_compression_type = new_extract_tracker.compression_type_id
+
+        expected_filetype = self.extract.extract.extract_filetype
+        given_filetype = new_extract_tracker.extract.extract_filetype
+
+        expected_full_filename = self.extract.full_filename
+        given_full_filename = new_extract_tracker.full_filename
+
+        expected_dataset_types = self.extract.dataset_types
+        given_dataset_types = new_extract_tracker.dataset_types
+
+        expected_process = self.extract.extract_process.process_tracking_id
+        given_process = new_extract_tracker.extract_process.process_tracking_id
+
+        self.assertEqual(expected_filename, given_filename)
+        self.assertEqual(expected_location, given_location)
+        self.assertEqual(expected_compression_type, given_compression_type)
+        self.assertEqual(expected_filetype, given_filetype)
+        self.assertEqual(expected_full_filename, given_full_filename)
+        self.assertEqual(expected_dataset_types, given_dataset_types)
+        self.assertEqual(expected_process, given_process)
+
+    def test_ensure_nulls_caught_on_instantiation(self):
+        """
+        With the adding of the ability of having a extract_id we have to allow for filename to
+        be nullable.  If ExtractTracker is instantiated without filename provided an error should
+        be raised.
+        :return:
+        """
+        with self.assertRaises(Exception) as context:
+
+            ExtractTracker(process_run=self.extract.process_run)
+
+        return self.assertTrue("Filename must be provided." in str(context.exception))

Original file line number	Diff line number	Diff line change
`@@ -129,6 +129,8 @@ class Extract(Base):`
`129`	`129`	`nullable=True,`
`130`	`130`	`)`
`131`	`131`
	`132`	`+ compression_type = relationship("ExtractCompressionType")`
	`133`	`+ extract_filetype = relationship("ExtractFileType")`
`132`	`134`	`extract_dataset_types = relationship(`
`133`	`135`	`"ExtractDatasetType",`
`134`	`136`	`back_populates="dataset_type_extracts",`