Merge branches 'master' and 'process_tracker_python-139' of https://github.com/OpenDataAlex/process_tracker_python into process_tracker_python-139

Alex Meadows · Alex Meadows · commit aafcfd19c5d3 · 2019-11-22T10:43:51.000-05:00
� Conflicts:
�	dbscripts/postgresql_process_tracker.sql
diff --git a/dbscripts/mysql_process_tracker.sql b/dbscripts/mysql_process_tracker.sql
@@ -136,6 +136,19 @@ create table location_lkup
 		foreign key (location_type_id) references location_type_lkup (location_type_id)
 );
 
+create table process_tracker.filesize_type_lkup
+(
+	filesize_type_id int auto_increment
+		primary key,
+	filesize_type_name varchar(75) not null,
+	filesize_type_code char(2) not null,
+	constraint filesize_type_lkup_filesize_type_code_uindex
+		unique (filesize_type_code),
+	constraint filesize_type_lkup_filesize_type_name_uindex
+		unique (filesize_type_name),
+	constraint filesize_type_lkup_udx01
+		unique (filesize_type_code, filesize_type_name)
+);
 
 create table extract_tracking
 (
@@ -153,6 +166,8 @@ create table extract_tracking
 	extract_load_record_count int null comment 'The record count of the data set when loading the data file.',
 	extract_compression_type_id int null,
 	extract_filetype_id int null,
+	extract_filesize numeric null,
+	extract_filesize_type_id int null,
 	constraint extract_filename
 		unique (extract_filename),
 	constraint extract_tracking_fk03
@@ -162,7 +177,9 @@ create table extract_tracking
 	constraint extract_tracking_ibfk_1
 		foreign key (extract_location_id) references process_tracker.location_lkup (location_id),
 	constraint extract_tracking_ibfk_2
-		foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id)
+		foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id),
+	constraint extract_tracking_fk06
+        foreign key (extract_filesize_type_id) references process_tracker.filesize_type_lkup (filesize_type_id)
 );
 
 create index extract_location_id
@@ -534,6 +551,7 @@ create table source_object_attribute_lkup
 	default_value_number decimal null,
 	is_key boolean default FALSE not null,
 	is_filter boolean default FALSE not null,
+	is_partition boolean default FALSE not null,
 	constraint source_object_attribute_lkup_udx01
 		unique (source_object_id, source_object_attribute_name),
 	constraint source_object_attribute_lkup_fk01
diff --git a/dbscripts/mysql_process_tracker_defaults.sql b/dbscripts/mysql_process_tracker_defaults.sql
@@ -43,4 +43,9 @@ INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code,
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (5, 'gte', 'greater than or equal');
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (6, 'not', 'not equal');
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (7, 'lke', 'like');
-INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
+INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
+
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (1, 'kilobytes', 'KB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (2, 'megabytes', 'MB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (3, 'gigabytes', 'GB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (4, 'bytes', 'B ');
diff --git a/dbscripts/postgresql_process_tracker_defaults.sql b/dbscripts/postgresql_process_tracker_defaults.sql
@@ -42,4 +42,9 @@ INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code,
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (5, 'gte', 'greater than or equal');
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (6, 'not', 'not equal');
 INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (7, 'lke', 'like');
-INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
+INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
+
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (1, 'kilobytes', 'KB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (2, 'megabytes', 'MB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (3, 'gigabytes', 'GB');
+INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (4, 'bytes', 'B ');
diff --git a/process_tracker/extract_tracker.py b/process_tracker/extract_tracker.py
@@ -1,6 +1,7 @@
 # Extract Tracking
 # Used in the creation and editing of extract records.  Used in conjunction with process tracking.
 from datetime import datetime
+import itertools
 import logging
 import os
 from pathlib import Path
@@ -43,6 +44,7 @@ def __init__(
         filetype=None,
         config_location=None,
         extract_id=None,
+        file_size=None,
     ):
         """
         ExtractTracker is the primary engine for tracking data extracts
@@ -68,6 +70,8 @@ def __init__(
         :param extract_id: If trying to work with a specific extract that's in process, provide the id and it will be
         reconstructed.
         :type extract_id: int
+        :param file_size: The size of the file (i.e. 2.21MB)
+        :type file_size: str
         """
         log_level = SettingsManager(
             config_location=config_location
@@ -114,6 +118,18 @@ def __init__(
             self.extract_process = self.retrieve_extract_process()
             self.sources = self.extract.extract_sources
 
+            if (
+                self.extract.extract_filesize is not None
+                and self.extract.extract_filesize_type is not None
+            ):
+
+                self.file_size = "%s %s" % (
+                    self.extract.extract_filesize,
+                    self.extract.extract_filesize_type,
+                )
+            else:
+                self.file_size = None
+
         else:
             if filename is None:
                 error_msg = "Filename must be provided."
@@ -230,6 +246,14 @@ def __init__(
                 self.logger.info("Status was not provided.  Initializing.")
                 self.extract.extract_status_id = self.extract_status_initializing
 
+            if file_size is not None:
+                split_filesize = self.file_size_splitter(file_size=file_size)
+
+                self.extract.extract_filesize = split_filesize[0]
+                self.extract.extract_filesize_type = split_filesize[1]
+
+            self.file_size = file_size
+
             self.session.commit()
 
     def add_dependency(self, dependency_type, dependency):
@@ -382,6 +406,47 @@ def extract_dependency_check(self, extracts=None):
         else:
             return False
 
+    def file_size_splitter(self, file_size):
+        """
+        Take provided file size and split the amount from the measure.
+        :param file_size: The provided file size with measure (i.e. 2.2GB)
+        :type file_size: str
+        :return:
+        """
+        file_size_split = dict()
+
+        amount = "".join(itertools.takewhile(str.isdigit, file_size))
+        self.logger.debug("Amount is now: %s" % amount)
+        measure = "".join([i for i in file_size if not i.isdigit() and i != "."])
+        self.logger.debug("Measure is: %s" % measure)
+        amount = int(amount)
+        self.logger.debug("Amount is now: %s" % amount)
+        if (
+            measure == "bytes"
+            or measure.lower() == "b"
+            or measure == ""
+            or measure is None
+        ):
+            amount = amount / 1048576  # converting bytes to gb
+            measure = "GB"
+        elif measure.lower() == "mb":
+
+            measure = "MB"
+        elif measure.lower() == "gb":
+            measure = "GB"
+        else:
+            error_msg = (
+                "Unsupported measure detected. Please provide file size in bytes, MB, or GB.  "
+                "Measure provided was: %s" % measure
+            )
+            self.logger.error(error_msg)
+            raise Exception(error_msg)
+
+        file_size_split[0] = amount
+        file_size_split[1] = measure
+
+        return file_size_split
+
     def get_dataset_types(self):
         """
         Get list of dataset types associated to extract and return list.
diff --git a/process_tracker/models/extract.py b/process_tracker/models/extract.py
@@ -9,6 +9,7 @@
     DateTime,
     ForeignKey,
     Integer,
+    Numeric,
     Sequence,
     String,
     UniqueConstraint,
@@ -128,9 +129,16 @@ class Extract(Base):
         ForeignKey("process_tracker.extract_filetype_lkup.extract_filetype_id"),
         nullable=True,
     )
+    extract_filesize = Column(Numeric, nullable=True)
+    extract_filesize_type_id = Column(
+        Integer,
+        ForeignKey("process_tracker.filesize_type_lkup.filesize_type_id"),
+        nullable=True,
+    )
 
     compression_type = relationship("ExtractCompressionType")
     extract_filetype = relationship("ExtractFileType")
+    extract_filesize_type = relationship("FileSizeType")
     extract_dataset_types = relationship(
         "ExtractDatasetType",
         back_populates="dataset_type_extracts",
@@ -317,6 +325,30 @@ def __repr__(self):
         )
 
 
+class FileSizeType(Base):
+    __tablename__ = "filesize_type_lkup"
+    __table_args__ = {"schema": "process_tracker"}
+
+    filesize_type_id = Column(
+        Integer,
+        Sequence("filesize_type_lkup_filesize_type_id_seq", schema="process_tracker"),
+        primary_key=True,
+        nullable=False,
+    )
+    filesize_type_name = Column(String(75), nullable=False, unique=True)
+    filesize_type_code = Column(String(2), nullable=False, unique=True)
+
+    UniqueConstraint(filesize_type_code, filesize_type_name)
+
+    def __repr__(self):
+
+        return "<FilesizeType id=%s, code=%s, name=%s>" % (
+            self.filesize_type_id,
+            self.filesize_type_code,
+            self.filesize_type_name,
+        )
+
+
 class LocationType(Base):
 
     __tablename__ = "location_type_lkup"
diff --git a/process_tracker/models/source.py b/process_tracker/models/source.py
@@ -318,6 +318,7 @@ class SourceObjectAttribute(Base):
     default_value_number = Column(Numeric, nullable=True)
     is_key = Column(Boolean, nullable=False, default=False)
     is_filter = Column(Boolean, nullable=False, default=False)
+    is_partition = Column(Boolean, nullable=False, default=False)
 
     UniqueConstraint(source_object_id, source_object_attribute_name)
 
diff --git a/process_tracker/process_tracker.py b/process_tracker/process_tracker.py
@@ -703,6 +703,7 @@ def find_process_source_attributes(self, process):
                 SourceObjectAttribute.source_object_attribute_name,
                 SourceObjectAttribute.is_key,
                 SourceObjectAttribute.is_filter,
+                SourceObjectAttribute.is_partition,
             )
             .join(SourceObject, SourceObjectAttribute.source_objects)
             .join(Source, SourceObject.sources)
@@ -723,6 +724,7 @@ def find_process_source_attributes(self, process):
                     "source_object_attribute_name": attribute.source_object_attribute_name,
                     "is_key": attribute.is_key,
                     "is_filter": attribute.is_filter,
+                    "is_partition": attribute.is_partition,
                 }
             )
 
@@ -744,6 +746,7 @@ def find_process_target_attributes(self, process):
                 SourceObjectAttribute.source_object_attribute_name,
                 SourceObjectAttribute.is_key,
                 SourceObjectAttribute.is_filter,
+                SourceObjectAttribute.is_partition,
             )
             .join(SourceObject, SourceObjectAttribute.source_objects)
             .join(Source, SourceObject.sources)
@@ -764,6 +767,7 @@ def find_process_target_attributes(self, process):
                     "target_object_attribute_name": attribute.source_object_attribute_name,
                     "is_key": attribute.is_key,
                     "is_filter": attribute.is_filter,
+                    "is_partition": attribute.is_partition,
                 }
             )
 
diff --git a/tests/test_extract_tracker.py b/tests/test_extract_tracker.py
@@ -754,3 +754,61 @@ def test_ensure_nulls_caught_on_instantiation(self):
             ExtractTracker(process_run=self.extract.process_run)
 
         return self.assertTrue("Filename must be provided." in str(context.exception))
+
+    def test_file_size_splitter_bytes(self):
+        """
+        Testing that if provided a file size in bytes, the file size in GB will be returned.
+        :return:
+        """
+        given_result = self.extract.file_size_splitter(file_size="1048576B")
+        given_result = {given_result[0], given_result[1]}
+        expected_result = {1, "GB"}
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_file_size_splitter_mb(self):
+        """
+        Testing that if provided a file size in MB, the file size will not be modified.
+        :return:
+        """
+        given_result = self.extract.file_size_splitter(file_size="1024MB")
+        given_result = {given_result[0], given_result[1]}
+        expected_result = {1024, "MB"}
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_file_size_splitter_gb(self):
+        """
+        Testing that if provided a file size in GB, the file size will not be modified.
+        :return:
+        """
+        given_result = self.extract.file_size_splitter(file_size="50GB")
+        given_result = {given_result[0], given_result[1]}
+        expected_result = {50, "GB"}
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_file_size_splitter_no_measure(self):
+        """
+        Testing that if provided a file size without a measure, the file size will be assumed to be bytes and returned
+        in GB.
+        :return:
+        """
+        given_result = self.extract.file_size_splitter(file_size="1048576")
+        given_result = {given_result[0], given_result[1]}
+        expected_result = {1, "GB"}
+
+        self.assertEqual(expected_result, given_result)
+
+    def test_file_size_splitter_invalid_measure(self):
+        """
+        Testing that if provided a file size with an invalid measure, an exception will be thrown.
+        :return:
+        """
+        with self.assertRaises(Exception) as context:
+            self.extract.file_size_splitter(file_size="1024ZXB")
+
+        self.assertTrue(
+            "Unsupported measure detected. Please provide file size in bytes, MB, or GB.  Measure provided was: ZXB"
+            in str(context.exception)
+        )
diff --git a/tests/test_process_tracker.py b/tests/test_process_tracker.py
@@ -2082,13 +2082,15 @@ def test_find_process_source_attributes(self):
                 "source_object_attribute_name": "attr_1",
                 "is_key": False,
                 "is_filter": False,
+                "is_partition": False,
             },
             {
                 "source_name": "source",
                 "source_object_name": "source_table",
                 "source_object_attribute_name": "attr_2",
                 "is_key": False,
                 "is_filter": False,
+                "is_partition": False,
             },
         ]
 
@@ -2115,13 +2117,15 @@ def test_find_process_target_attributes(self):
                 "target_object_attribute_name": "attr_1",
                 "is_key": False,
                 "is_filter": False,
+                "is_partition": False,
             },
             {
                 "target_name": "target",
                 "target_object_name": "target_table",
                 "target_object_attribute_name": "attr_2",
                 "is_key": False,
                 "is_filter": False,
+                "is_partition": False,
             },
         ]
 

Original file line number	Diff line number	Diff line change
`@@ -703,6 +703,7 @@ def find_process_source_attributes(self, process):`
`703`	`703`	`SourceObjectAttribute.source_object_attribute_name,`
`704`	`704`	`SourceObjectAttribute.is_key,`
`705`	`705`	`SourceObjectAttribute.is_filter,`
	`706`	`+ SourceObjectAttribute.is_partition,`
`706`	`707`	`)`
`707`	`708`	`.join(SourceObject, SourceObjectAttribute.source_objects)`
`708`	`709`	`.join(Source, SourceObject.sources)`
`@@ -723,6 +724,7 @@ def find_process_source_attributes(self, process):`
`723`	`724`	`"source_object_attribute_name": attribute.source_object_attribute_name,`
`724`	`725`	`"is_key": attribute.is_key,`
`725`	`726`	`"is_filter": attribute.is_filter,`
	`727`	`+ "is_partition": attribute.is_partition,`
`726`	`728`	`}`
`727`	`729`	`)`
`728`	`730`
`@@ -744,6 +746,7 @@ def find_process_target_attributes(self, process):`
`744`	`746`	`SourceObjectAttribute.source_object_attribute_name,`
`745`	`747`	`SourceObjectAttribute.is_key,`
`746`	`748`	`SourceObjectAttribute.is_filter,`
	`749`	`+ SourceObjectAttribute.is_partition,`
`747`	`750`	`)`
`748`	`751`	`.join(SourceObject, SourceObjectAttribute.source_objects)`
`749`	`752`	`.join(Source, SourceObject.sources)`
`@@ -764,6 +767,7 @@ def find_process_target_attributes(self, process):`
`764`	`767`	`"target_object_attribute_name": attribute.source_object_attribute_name,`
`765`	`768`	`"is_key": attribute.is_key,`
`766`	`769`	`"is_filter": attribute.is_filter,`
	`770`	`+ "is_partition": attribute.is_partition,`
`767`	`771`	`}`
`768`	`772`	`)`
`769`	`773`