Wrote tests for ExtractTracking

OpenDataAlex · OpenDataAlex · commit 695f5e478345 · 2019-05-12T12:51:29.000-04:00
✅ Wrote tests for ExtractTracking
🐛 Debugged issue with ProcessTracking initialization

In testing ExtractTracking figured that the ProcessTracking module should
in fact initialize a new run instead of making the user do so.  This way
when the user calls ProcessTracker it will automatically try to initialize
a new run.
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/process_tracker/extract_tracking.py b/process_tracker/extract_tracking.py
@@ -43,8 +43,6 @@ def __init__(self, process_run, filename, location_path, location_name=None):
                                                      , extract_location_id=self.location.location_id
                                                      , extract_source_id=self.source.source_id)
 
-        self.extract_process = self.retrieve_extract_process()
-
         # Getting all status types in the event there are custom status types added later.
         self.extract_status_types = self.get_extract_status_types()
 
@@ -58,6 +56,8 @@ def __init__(self, process_run, filename, location_path, location_name=None):
         self.extract_status_deleted = self.extract_status_types['deleted']
         self.extract_status_error = self.extract_status_types['error']
 
+        self.extract_process = self.retrieve_extract_process()
+
     def change_extract_status(self, new_status):
         """
         Change an extract record status.
@@ -66,10 +66,10 @@ def change_extract_status(self, new_status):
         status_date = datetime.now()
         new_status = self.extract_status_types[new_status]
 
-        if self.extract_status_types[new_status]:
+        if new_status:
             self.extract.extract_status_id = new_status
 
-            self.extract_process.extract_status_id = new_status
+            self.extract_process.extract_process_status_id = new_status
             self.extract_process.extract_process_event_date_time = status_date
 
             session.commit()
@@ -88,16 +88,19 @@ def derive_location_name(location_path):
         # Idea is to generalize things like grabbing the last directory name in the path,
         # what type of path is it (normal, s3, etc.)
 
+        location_prefix = None
+        location_name = ""
+
         location_path = location_path.lower()  # Don't care about casing.
 
         if "s3" in location_path:
             # If the path is an S3 Bucket, prefix to name.
 
             location_prefix = "s3"
-        else:
-            location_prefix = ""
 
-        location_name = location_prefix + " - "
+        if location_prefix is not None:
+
+            location_name = location_prefix + " - "
 
         location_name += basename(normpath(location_path))
 
@@ -124,7 +127,8 @@ def retrieve_extract_process(self):
 
         extract_process = self.data_store.get_or_create(model=ExtractProcess
                                                         , extract_tracking_id=self.extract.extract_id
-                                                        , process_tracking_id=self.process_run.process_tracking_id)
+                                                        , process_tracking_id=self.process_run.process_tracking_run
+                                                                                              .process_tracking_id)
 
         # Only need to set to 'initializing' when it's the first time a process run is trying to work with files.
         if extract_process.extract_process_status_id is None:
diff --git a/process_tracker/process_tracking.py b/process_tracker/process_tracking.py
@@ -41,7 +41,6 @@ def __init__(self, process_name, process_type, actor_name, tool_name, source_nam
                                                      , process_tool_id=self.tool.tool_id)
 
         self.process_name = process_name
-        self.process_tracking_run = ProcessTracking()
 
         # Getting all status types in the event there are custom status types added later.
         self.process_status_types = self.get_process_status_types()
@@ -51,6 +50,8 @@ def __init__(self, process_name, process_type, actor_name, tool_name, source_nam
         self.process_status_complete = self.process_status_types['completed']
         self.process_status_failed = self.process_status_types['failed']
 
+        self.process_tracking_run = self.register_new_process_run()
+
     def change_run_status(self, new_status, end_date=None):
         """
         Change a process tracking run record from 'running' to another status.
@@ -241,12 +242,12 @@ def register_new_process_run(self):
                                       , process_run_id=new_run_id
                                       , process_run_start_date_time=datetime.now()
                                       , process_run_actor_id=self.actor.actor_id
-                                      , is_latest_run = True)
+                                      , is_latest_run=True)
 
             session.add(new_run)
             session.commit()
 
-            self.process_tracking_run = new_run
+            return new_run
 
             self.logger.info('Process tracking record added for %s' % self.process_name)
 
diff --git a/tests/test_extract_tracking.py b/tests/test_extract_tracking.py
@@ -3,7 +3,7 @@
 from datetime import datetime
 import unittest
 
-from models.extract import Extract, ExtractProcess, ExtractStatus
+from models.extract import Extract, ExtractProcess, ExtractStatus, Location
 from models.process import Process, ProcessTracking
 
 from process_tracker import session
@@ -21,12 +21,12 @@ def setUpClass(cls):
                                              , tool_name='Spark'
                                              , source_name='Unittests')
 
-        cls.process_tracker.register_new_process_run()
-
-        cls.process_run = cls.process_tracker.process_tracking_run
+        cls.process_run = cls.process_tracker
 
     @classmethod
     def tearDownClass(cls):
+
+        session.query(ProcessTracking).delete()
         session.query(Process).delete()
         session.commit()
 
@@ -46,9 +46,9 @@ def tearDown(self):
         Need to clean up tables to return them to pristine state for other tests.
         :return:
         """
-        session.query(ProcessTracking).delete()
         session.query(ExtractProcess).delete()
         session.query(Extract).delete()
+        session.query(Location).delete()
         session.commit()
 
     def test_change_extract_status(self):
@@ -69,4 +69,63 @@ def test_change_extract_status(self):
         expected_result = [self.extract.extract_status_ready
                            , self.extract.extract_status_ready]
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
+
+    def test_change_extract_status_initialization(self):
+        """
+        Testing that when the extract is first being worked on by a process, the status is set to 'initializing'
+        :return:
+        """
+        extract_id = self.extract.extract.extract_id
+        self.extract.retrieve_extract_process()
+
+        extract_process_record = session.query(ExtractProcess).filter(ExtractProcess.extract_tracking_id == extract_id)
+
+        given_result = extract_process_record[0].extract_process_status_id
+        expected_result = self.extract.extract_status_initializing
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_derive_location_name_local_path(self):
+        """
+        Testing that if a location name is not provided, one is created from the local path provided.
+        :return:
+        """
+        extract = ExtractTracker(process_run=self.process_run
+                                      , filename='test_extract_filename2.csv'
+                                      , location_path='/home/test/extract_dir2')
+
+        location = session.query(Location).filter(Location.location_id == extract.extract.extract_location_id)
+
+        given_result = location[0].location_name
+        expected_result = 'extract_dir2'
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_derive_location_name_s3(self):
+        """
+        Testing that if a location name is not provided, one is created from the s3 path provided.
+        :return:
+        """
+        extract = ExtractTracker(process_run=self.process_run
+                                 , filename='test_extract_filename2.csv'
+                                 , location_path='https://test-test.s3.amazonaws.com/test/extract_dir')
+
+        location = session.query(Location).filter(Location.location_id == extract.extract.extract_location_id)
+
+        given_result = location[0].location_name
+        expected_result = 's3 - extract_dir'
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_location_name_provided(self):
+        """
+        Testing that if a location name is provided (like with default extract), one is not created.
+        :return:
+        """
+        location = session.query(Location).filter(Location.location_id == self.extract.extract.extract_location_id)
+
+        given_result = location[0].location_name
+        expected_result = 'Test Location'
+
+        self.assertEqual(expected_result, given_result)
diff --git a/tests/test_process_tracking.py b/tests/test_process_tracking.py
@@ -12,14 +12,6 @@
 
 class TestProcessTracking(unittest.TestCase):
 
-    @classmethod
-    def setUpClass(cls):
-        cls.process_tracker = ProcessTracker(process_name='Testing Process Tracking Initialization'
-                                             , process_type='Extract'
-                                             , actor_name='UnitTesting'
-                                             , tool_name='Spark'
-                                             , source_name='Unittests')
-
     @classmethod
     def tearDownClass(cls):
         session.query(Process).delete()
@@ -30,8 +22,11 @@ def setUp(self):
         Creating an initial process tracking run record for testing.
         :return:
         """
-
-        self.process_tracker.register_new_process_run()
+        self.process_tracker = ProcessTracker(process_name='Testing Process Tracking Initialization'
+                                             , process_type='Extract'
+                                             , actor_name='UnitTesting'
+                                             , tool_name='Spark'
+                                             , source_name='Unittests')
 
         self.process_id = self.process_tracker.process.process_id
         self.provided_end_date = datetime.now()
@@ -55,7 +50,7 @@ def test_verify_session_variables_postgresql(self):
         given_result = data_store_type
         expected_result = 'postgresql'
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_initializing_process_tracking(self):
         """
@@ -65,7 +60,7 @@ def test_initializing_process_tracking(self):
         given_result = self.process_tracker.actor.actor_name
         expected_result = 'UnitTesting'
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_register_new_process_run(self):
         """
@@ -77,7 +72,7 @@ def test_register_new_process_run(self):
         given_result = session.query(ProcessTracking).filter(ProcessTracking.process_id == self.process_id).count()
         expected_result = 1
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_register_new_process_run_exception(self):
         """
@@ -110,7 +105,7 @@ def test_register_new_process_run_with_previous_run(self):
         given_result = process_runs[0].is_latest_run
         expected_result = False
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_change_run_status_complete(self):
         """
@@ -124,7 +119,7 @@ def test_change_run_status_complete(self):
         given_result = run_record[0].process_status_id
         expected_result = self.process_tracker.process_status_complete
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_change_run_status_failed(self):
         """
@@ -138,7 +133,7 @@ def test_change_run_status_failed(self):
         given_result = run_record[0].process_status_id
         expected_result = self.process_tracker.process_status_failed
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_change_run_status_with_end_date(self):
         """
@@ -153,7 +148,7 @@ def test_change_run_status_with_end_date(self):
         given_result = run_record[0].process_run_end_date_time
         expected_result = self.provided_end_date
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_raise_run_error_type_exists_no_fail(self):
         """
@@ -172,7 +167,7 @@ def test_raise_run_error_type_exists_no_fail(self):
 
         expected_result = 1
 
-        self.assertEqual(given_result, expected_result)
+        self.assertEqual(expected_result, given_result)
 
     def test_raise_run_error_type_not_exists(self):
         """
@@ -219,7 +214,7 @@ def test_raise_run_error_with_fail(self):
                            , self.provided_end_date]
 
         with self.subTest():
-            self.assertEqual(given_result, expected_result)
+            self.assertEqual(expected_result, given_result)
         with self.subTest():
             self.assertTrue('Process halting.  An error triggered the process to fail.' in str(context.exception))