Skip to content

Commit 31bfb82

Browse files
author
Alex Meadows
committed
process_tracker_python-138 Add file size to extract_tracker
✨ File size and measure added to extract_tracker Closes: #138
1 parent d936fea commit 31bfb82

File tree

7 files changed

+215
-4
lines changed

7 files changed

+215
-4
lines changed

dbscripts/mysql_process_tracker.sql

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,24 @@ create table location_lkup
136136
foreign key (location_type_id) references location_type_lkup (location_type_id)
137137
);
138138

139+
create table process_tracker.filesize_type_lkup
140+
(
141+
filesize_type_id serial not null,
142+
constraint filesize_type_lkup_pk
143+
primary key,
144+
filesize_type_name varchar(75) not null,
145+
filesize_type_code char(2) not null
146+
);
147+
148+
create unique index filesize_type_lkup_filesize_type_code_uindex
149+
on process_tracker.filesize_type_lkup (filesize_type_code);
150+
151+
create unique index filesize_type_lkup_filesize_type_name_uindex
152+
on process_tracker.filesize_type_lkup (filesize_type_name);
153+
154+
create unique index filesize_type_lkup_udx03
155+
on process_tracker.filesize_type_lkup (filesize_type_code, filesize_type_name);
156+
139157

140158
create table extract_tracking
141159
(
@@ -153,6 +171,8 @@ create table extract_tracking
153171
extract_load_record_count int null comment 'The record count of the data set when loading the data file.',
154172
extract_compression_type_id int null,
155173
extract_filetype_id int null,
174+
extract_filesize numeric null,
175+
extract_filesize_type_id int null,
156176
constraint extract_filename
157177
unique (extract_filename),
158178
constraint extract_tracking_fk03
@@ -162,7 +182,9 @@ create table extract_tracking
162182
constraint extract_tracking_ibfk_1
163183
foreign key (extract_location_id) references process_tracker.location_lkup (location_id),
164184
constraint extract_tracking_ibfk_2
165-
foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id)
185+
foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id),
186+
constraint extract_tracking_fk06
187+
foreign key (extract_filesize_type) references process_tracker.filesize_type_lkup (filesize_type_id)
166188
);
167189

168190
create index extract_location_id

dbscripts/mysql_process_tracker_defaults.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,9 @@ INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code,
4343
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (5, 'gte', 'greater than or equal');
4444
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (6, 'not', 'not equal');
4545
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (7, 'lke', 'like');
46-
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
46+
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
47+
48+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (1, 'kilobytes', 'KB');
49+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (2, 'megabytes', 'MB');
50+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (3, 'gigabytes', 'GB');
51+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (4, 'bytes', 'B ');

dbscripts/postgresql_process_tracker.sql

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,26 @@ create unique index extract_compression_type_lkup_extract_compression_type_uinde
438438
on process_tracker.extract_compression_type_lkup (extract_compression_type);
439439

440440

441+
create table process_tracker.filesize_type_lkup
442+
(
443+
filesize_type_id serial not null
444+
constraint filesize_type_lkup_pk
445+
primary key,
446+
filesize_type_name varchar(75) not null,
447+
filesize_type_code char(2) not null
448+
);
449+
450+
alter table process_tracker.filesize_type_lkup owner to pt_admin;
451+
452+
create unique index filesize_type_lkup_filesize_type_code_uindex
453+
on process_tracker.filesize_type_lkup (filesize_type_code);
454+
455+
create unique index filesize_type_lkup_filesize_type_name_uindex
456+
on process_tracker.filesize_type_lkup (filesize_type_name);
457+
458+
create unique index filesize_type_lkup_udx03
459+
on process_tracker.filesize_type_lkup (filesize_type_code, filesize_type_name);
460+
441461

442462
create table extract_tracking
443463
(
@@ -461,9 +481,13 @@ create table extract_tracking
461481
extract_load_low_date_time timestamp,
462482
extract_load_high_date_time timestamp,
463483
extract_load_record_count integer,
464-
extract_compression_type_id integer
484+
extract_compression_type_id integer,
465485
constraint extract_tracking_fk04
466486
references extract_compression_type_lkup,
487+
extract_filesize numeric,
488+
extract_filesize_type_id integer
489+
constraint extract_tracking_fk06
490+
references process_tracker.filesize_type_lkup,
467491
extract_filetype_id integer
468492
constraint extract_tracking_fk05
469493
references extract_filetype_lkup

dbscripts/postgresql_process_tracker_defaults.sql

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,9 @@ INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code,
4242
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (5, 'gte', 'greater than or equal');
4343
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (6, 'not', 'not equal');
4444
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (7, 'lke', 'like');
45-
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
45+
INSERT INTO process_tracker.filter_type_lkup (filter_type_id, filter_type_code, filter_type_name) VALUES (8, 'in', 'in set');
46+
47+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (1, 'kilobytes', 'KB');
48+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (2, 'megabytes', 'MB');
49+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (3, 'gigabytes', 'GB');
50+
INSERT INTO process_tracker.filesize_type_lkup (filesize_type_id, filesize_type_name, filesize_type_code) VALUES (4, 'bytes', 'B ');

process_tracker/extract_tracker.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Extract Tracking
22
# Used in the creation and editing of extract records. Used in conjunction with process tracking.
33
from datetime import datetime
4+
import itertools
45
import logging
56
import os
67
from pathlib import Path
@@ -43,6 +44,7 @@ def __init__(
4344
filetype=None,
4445
config_location=None,
4546
extract_id=None,
47+
file_size=None,
4648
):
4749
"""
4850
ExtractTracker is the primary engine for tracking data extracts
@@ -68,6 +70,8 @@ def __init__(
6870
:param extract_id: If trying to work with a specific extract that's in process, provide the id and it will be
6971
reconstructed.
7072
:type extract_id: int
73+
:param file_size: The size of the file (i.e. 2.21MB)
74+
:type file_size: str
7175
"""
7276
log_level = SettingsManager(
7377
config_location=config_location
@@ -114,6 +118,18 @@ def __init__(
114118
self.extract_process = self.retrieve_extract_process()
115119
self.sources = self.extract.extract_sources
116120

121+
if (
122+
self.extract.extract_filesize is not None
123+
and self.extract.extract_filesize_type is not None
124+
):
125+
126+
self.file_size = "%s %s" % (
127+
self.extract.extract_filesize,
128+
self.extract.extract_filesize_type,
129+
)
130+
else:
131+
self.file_size = None
132+
117133
else:
118134
if filename is None:
119135
error_msg = "Filename must be provided."
@@ -230,6 +246,14 @@ def __init__(
230246
self.logger.info("Status was not provided. Initializing.")
231247
self.extract.extract_status_id = self.extract_status_initializing
232248

249+
if file_size is not None:
250+
split_filesize = self.file_size_splitter(file_size=file_size)
251+
252+
self.extract.extract_filesize = split_filesize[0]
253+
self.extract.extract_filesize_type = split_filesize[1]
254+
255+
self.file_size = file_size
256+
233257
self.session.commit()
234258

235259
def add_dependency(self, dependency_type, dependency):
@@ -382,6 +406,47 @@ def extract_dependency_check(self, extracts=None):
382406
else:
383407
return False
384408

409+
def file_size_splitter(self, file_size):
410+
"""
411+
Take provided file size and split the amount from the measure.
412+
:param file_size: The provided file size with measure (i.e. 2.2GB)
413+
:type file_size: str
414+
:return:
415+
"""
416+
file_size_split = dict()
417+
418+
amount = "".join(itertools.takewhile(str.isdigit, file_size))
419+
self.logger.debug("Amount is now: %s" % amount)
420+
measure = "".join([i for i in file_size if not i.isdigit() and i != "."])
421+
self.logger.debug("Measure is: %s" % measure)
422+
amount = int(amount)
423+
self.logger.debug("Amount is now: %s" % amount)
424+
if (
425+
measure == "bytes"
426+
or measure.lower() == "b"
427+
or measure == ""
428+
or measure is None
429+
):
430+
amount = amount / 1048576 # converting bytes to gb
431+
measure = "GB"
432+
elif measure.lower() == "mb":
433+
434+
measure = "MB"
435+
elif measure.lower() == "gb":
436+
measure = "GB"
437+
else:
438+
error_msg = (
439+
"Unsupported measure detected. Please provide file size in bytes, MB, or GB. "
440+
"Measure provided was: %s" % measure
441+
)
442+
self.logger.error(error_msg)
443+
raise Exception(error_msg)
444+
445+
file_size_split[0] = amount
446+
file_size_split[1] = measure
447+
448+
return file_size_split
449+
385450
def get_dataset_types(self):
386451
"""
387452
Get list of dataset types associated to extract and return list.

process_tracker/models/extract.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
DateTime,
1010
ForeignKey,
1111
Integer,
12+
Numeric,
1213
Sequence,
1314
String,
1415
UniqueConstraint,
@@ -128,9 +129,16 @@ class Extract(Base):
128129
ForeignKey("process_tracker.extract_filetype_lkup.extract_filetype_id"),
129130
nullable=True,
130131
)
132+
extract_filesize = Column(Numeric, nullable=True)
133+
extract_filesize_type_id = Column(
134+
Integer,
135+
ForeignKey("process_tracker.filesize_type_lkup.filesize_type_id"),
136+
nullable=True,
137+
)
131138

132139
compression_type = relationship("ExtractCompressionType")
133140
extract_filetype = relationship("ExtractFileType")
141+
extract_filesize_type = relationship("FileSizeType")
134142
extract_dataset_types = relationship(
135143
"ExtractDatasetType",
136144
back_populates="dataset_type_extracts",
@@ -317,6 +325,30 @@ def __repr__(self):
317325
)
318326

319327

328+
class FileSizeType(Base):
329+
__tablename__ = "filesize_type_lkup"
330+
__table_args__ = {"schema": "process_tracker"}
331+
332+
filesize_type_id = Column(
333+
Integer,
334+
Sequence("filesize_type_lkup_filesize_type_id_seq", schema="process_tracker"),
335+
primary_key=True,
336+
nullable=False,
337+
)
338+
filesize_type_name = Column(String(75), nullable=False, unique=True)
339+
filesize_type_code = Column(String(2), nullable=False, unique=True)
340+
341+
UniqueConstraint(filesize_type_code, filesize_type_name)
342+
343+
def __repr__(self):
344+
345+
return "<FilesizeType id=%s, code=%s, name=%s>" % (
346+
self.filesize_type_id,
347+
self.filesize_type_code,
348+
self.filesize_type_name,
349+
)
350+
351+
320352
class LocationType(Base):
321353

322354
__tablename__ = "location_type_lkup"

tests/test_extract_tracker.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,3 +754,61 @@ def test_ensure_nulls_caught_on_instantiation(self):
754754
ExtractTracker(process_run=self.extract.process_run)
755755

756756
return self.assertTrue("Filename must be provided." in str(context.exception))
757+
758+
def test_file_size_splitter_bytes(self):
759+
"""
760+
Testing that if provided a file size in bytes, the file size in GB will be returned.
761+
:return:
762+
"""
763+
given_result = self.extract.file_size_splitter(file_size="1048576B")
764+
given_result = {given_result[0], given_result[1]}
765+
expected_result = {1, "GB"}
766+
767+
self.assertEqual(expected_result, given_result)
768+
769+
def test_file_size_splitter_mb(self):
770+
"""
771+
Testing that if provided a file size in MB, the file size will not be modified.
772+
:return:
773+
"""
774+
given_result = self.extract.file_size_splitter(file_size="1024MB")
775+
given_result = {given_result[0], given_result[1]}
776+
expected_result = {1024, "MB"}
777+
778+
self.assertEqual(expected_result, given_result)
779+
780+
def test_file_size_splitter_gb(self):
781+
"""
782+
Testing that if provided a file size in GB, the file size will not be modified.
783+
:return:
784+
"""
785+
given_result = self.extract.file_size_splitter(file_size="50GB")
786+
given_result = {given_result[0], given_result[1]}
787+
expected_result = {50, "GB"}
788+
789+
self.assertEqual(expected_result, given_result)
790+
791+
def test_file_size_splitter_no_measure(self):
792+
"""
793+
Testing that if provided a file size without a measure, the file size will be assumed to be bytes and returned
794+
in GB.
795+
:return:
796+
"""
797+
given_result = self.extract.file_size_splitter(file_size="1048576")
798+
given_result = {given_result[0], given_result[1]}
799+
expected_result = {1, "GB"}
800+
801+
self.assertEqual(expected_result, given_result)
802+
803+
def test_file_size_splitter_invalid_measure(self):
804+
"""
805+
Testing that if provided a file size with an invalid measure, an exception will be thrown.
806+
:return:
807+
"""
808+
with self.assertRaises(Exception) as context:
809+
self.extract.file_size_splitter(file_size="1024ZXB")
810+
811+
self.assertTrue(
812+
"Unsupported measure detected. Please provide file size in bytes, MB, or GB. Measure provided was: ZXB"
813+
in str(context.exception)
814+
)

0 commit comments

Comments
 (0)