Skip to content

Commit 28bdbf4

Browse files
authored
Merge pull request #96 from OpenDataAlex/process_tracker_python-83
Process tracker python 83
2 parents 45c4697 + 3d86faa commit 28bdbf4

File tree

8 files changed

+631
-404
lines changed

8 files changed

+631
-404
lines changed

dbscripts/mysql_process_tracker.sql

Lines changed: 184 additions & 167 deletions
Large diffs are not rendered by default.

dbscripts/mysql_process_tracker_defaults.sql

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,8 @@ INSERT INTO process_tracker.error_type_lkup (error_type_id, error_type_name) VAL
2020
INSERT INTO process_tracker.process_type_lkup (process_type_id, process_type_name) VALUES (default, 'Extract');
2121
INSERT INTO process_tracker.process_type_lkup (process_type_id, process_type_name) VALUES (default, 'Load');
2222

23-
INSERT INTO process_tracker.system_lkup (system_id, system_key, system_value) VALUES (default, 'version', '0.6.0');
23+
INSERT INTO process_tracker.system_lkup (system_id, system_key, system_value) VALUES (default, 'version', '0.7.0');
24+
25+
INSERT INTO process_tracker.extract_compression_type_lkup (extract_compression_type_id, extract_compression_type) VALUES (1, 'zip');
26+
27+
INSERT INTO process_tracker.extract_filetype_lkup (extract_filetype_id, extract_filetype_code, extract_filetype, delimiter_char, quote_char, escape_char) VALUES (1, 'csv', 'Comma Separated Values', ',', '"', '/');

dbscripts/postgresql_process_tracker.sql

Lines changed: 269 additions & 234 deletions
Large diffs are not rendered by default.

dbscripts/postgresql_process_tracker_defaults.sql

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,8 @@ INSERT INTO process_tracker.error_type_lkup (error_type_id, error_type_name) VAL
2121
INSERT INTO process_tracker.process_type_lkup (process_type_id, process_type_name) VALUES (default, 'Extract');
2222
INSERT INTO process_tracker.process_type_lkup (process_type_id, process_type_name) VALUES (default, 'Load');
2323

24-
INSERT INTO process_tracker.system_lkup (system_id, system_key, system_value) VALUES (default, 'version', '0.6.0');
24+
INSERT INTO process_tracker.system_lkup (system_id, system_key, system_value) VALUES (default, 'version', '0.7.0');
25+
26+
INSERT INTO process_tracker.extract_compression_type_lkup (extract_compression_type_id, extract_compression_type) VALUES (1, 'zip');
27+
28+
INSERT INTO process_tracker.extract_filetype_lkup (extract_filetype_id, extract_filetype_code, extract_filetype, delimiter_char, quote_char, escape_char) VALUES (1, 'csv', 'Comma Separated Values', ',', '"', '/');

process_tracker/extract_tracker.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Used in the creation and editing of extract records. Used in conjunction with process tracking.
33
from datetime import datetime
44
import logging
5+
import os
56
from pathlib import Path
67

78
from sqlalchemy.orm import aliased
@@ -11,6 +12,8 @@
1112
from process_tracker.utilities import utilities
1213
from process_tracker.models.extract import (
1314
Extract,
15+
ExtractCompressionType,
16+
ExtractFileType,
1417
ExtractDatasetType,
1518
ExtractDependency,
1619
ExtractProcess,
@@ -27,6 +30,8 @@ def __init__(
2730
location_name=None,
2831
location_path=None,
2932
status=None,
33+
compression_type=None,
34+
filetype=None,
3035
config_location=None,
3136
):
3237
"""
@@ -44,6 +49,10 @@ def __init__(
4449
:type location_name: string
4550
:param status: Optional if status does not need to be 'initializing', which is default.
4651
:type status: string
52+
:param compression_type: Optional compression format of the extract.
53+
:type compression_type: String
54+
:param filetype: Optional file type of the extract. Will try to be derived from the filename if not provided.
55+
:type filetype: string
4756
:param config_location: Optional location for the process_tracker configuration file.
4857
:type config_location: string
4958
"""
@@ -74,12 +83,54 @@ def __init__(
7483
else:
7584
raise Exception("A location object or location_path must be provided.")
7685

86+
if compression_type is not None:
87+
self.logger.info("Finding compression type.")
88+
try:
89+
self.compression_type = self.data_store.get_or_create_item(
90+
model=ExtractCompressionType,
91+
create=False,
92+
extract_compression_type=compression_type,
93+
)
94+
except Exception:
95+
error_msg = "%s is not a valid compression type." % compression_type
96+
self.logger.error(error_msg)
97+
raise Exception(error_msg)
98+
99+
self.compression_type_id = self.compression_type.extract_compression_type_id
100+
else:
101+
self.compression_type_id = None
102+
103+
if filetype is not None:
104+
self.logger.info("File type provided. Verifying it is a valid filetype.")
105+
try:
106+
self.filetype = self.data_store.get_or_create_item(
107+
model=ExtractFileType, create=False, extract_filetype=filetype
108+
)
109+
except Exception:
110+
error_msg = "%s is not a valid file type." % filetype
111+
self.logger.error(error_msg)
112+
raise Exception(error_msg)
113+
else:
114+
# Need to try to determine the filetype based on the extension of the filename.
115+
file_extension = os.path.splitext(filename)[1]
116+
file_extension = file_extension.replace(".", "")
117+
self.logger.info(
118+
"Trying to find record for file extension: %s" % file_extension
119+
)
120+
self.filetype = self.data_store.get_or_create_item(
121+
model=ExtractFileType,
122+
create=False,
123+
extract_filetype_code=file_extension,
124+
)
125+
77126
self.logger.info("Registering extract.")
78127

79128
self.extract = self.data_store.get_or_create_item(
80129
model=Extract,
81130
extract_filename=filename,
82131
extract_location_id=self.location.location.location_id,
132+
extract_compression_type_id=self.compression_type_id,
133+
extract_filetype_id=self.filetype.extract_filetype_id,
83134
)
84135

85136
if location_path is not None:

process_tracker/location_tracker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# For processes dealing with Extract Locations.
33
import logging
44
from pathlib import PurePath
5-
from os.path import basename, dirname, join, isdir, normpath
65

76
from process_tracker.utilities.aws_utilities import AwsUtilities
87
from process_tracker.utilities.logging import console

process_tracker/models/extract.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,52 @@ def __repr__(self):
4141
)
4242

4343

44+
class ExtractCompressionType(Base):
45+
46+
__tablename__ = "extract_compression_type_lkup"
47+
__table_args__ = {"schema": "process_tracker"}
48+
49+
extract_compression_type_id = Column(
50+
Integer,
51+
Sequence(
52+
"extract_compression_type_extract_compression_type_id",
53+
schema="process_tracker",
54+
),
55+
primary_key=True,
56+
nullable=False,
57+
)
58+
extract_compression_type = Column(String(25), unique=True, nullable=False)
59+
60+
def __repr__(self):
61+
62+
return "<Extract Compression Type name=%s>" % (self.extract_compression_type)
63+
64+
65+
class ExtractFileType(Base):
66+
67+
__tablename__ = "extract_filetype_lkup"
68+
__table_args__ = {"schema": "process_tracker"}
69+
70+
extract_filetype_id = Column(
71+
Integer,
72+
Sequence("extract_filetype_extract_filetype_id_seq", schema="process_tracker"),
73+
primary_key=True,
74+
nullable=False,
75+
)
76+
extract_filetype_code = Column(String(5), nullable=False)
77+
extract_filetype = Column(String(75), nullable=False, unique=True)
78+
delimiter_char = Column(String(1), nullable=True)
79+
quote_char = Column(String(1), nullable=True)
80+
escape_char = Column(String(1), nullable=True)
81+
82+
def __repr__(self):
83+
84+
return "<Extract Filetype code=%s, name=%s>" % (
85+
self.extract_filetype_code,
86+
self.extract_filetype,
87+
)
88+
89+
4490
class Extract(Base):
4591

4692
__tablename__ = "extract_tracking"
@@ -70,6 +116,18 @@ class Extract(Base):
70116
extract_load_low_date_time = Column(DateTime, nullable=True)
71117
extract_load_high_date_time = Column(DateTime, nullable=True)
72118
extract_load_record_count = Column(Integer, nullable=True)
119+
extract_compression_type_id = Column(
120+
Integer,
121+
ForeignKey(
122+
"process_tracker.extract_compression_type_lkup.extract_compression_type_id"
123+
),
124+
nullable=True,
125+
)
126+
extract_filetype_id = Column(
127+
Integer,
128+
ForeignKey("process_tracker.extract_filetype_lkup.extract_filetype_id"),
129+
nullable=True,
130+
)
73131

74132
extract_dataset_types = relationship(
75133
"ExtractDatasetType",

tests/test_extract_tracker.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,3 +547,62 @@ def test_set_extract_record_count_invalid_type(self):
547547
return self.assertTrue(
548548
"blarg is not a valid audit_type." in str(context.exception)
549549
)
550+
551+
def test_set_compression_type(self):
552+
"""Testing that when an extract is created with a compression type, the type is associated correctly."""
553+
554+
extract = ExtractTracker(
555+
process_run=self.process_run,
556+
filename="Compression Type File.csv",
557+
location_name="Test Location",
558+
location_path="/home/test/extract_dir",
559+
compression_type="zip",
560+
)
561+
562+
given_result = extract.compression_type.extract_compression_type
563+
expected_result = "zip"
564+
565+
self.assertEqual(expected_result, given_result)
566+
567+
def test_set_compression_type_invalid(self):
568+
"""Testing that when an extract is created with an invalid compression type, an error is thrown."""
569+
570+
with self.assertRaises(Exception) as context:
571+
ExtractTracker(
572+
process_run=self.process_run,
573+
filename="Compression Type File.csv",
574+
location_name="Test Location",
575+
location_path="/home/test/extract_dir",
576+
compression_type="zap",
577+
)
578+
579+
self.assertTrue("zap is not a valid compression type" in str(context.exception))
580+
581+
def test_set_file_type(self):
582+
"""Testing that when an extract is create with a valid file type, the type is associated correctly."""
583+
extract = ExtractTracker(
584+
process_run=self.process_run,
585+
filename="Compression Type File.csv",
586+
location_name="Test Location",
587+
location_path="/home/test/extract_dir",
588+
filetype="Comma Separated Values",
589+
)
590+
591+
given_result = extract.filetype.extract_filetype_code
592+
expected_result = "csv"
593+
594+
self.assertEqual(expected_result, given_result)
595+
596+
def test_set_file_type_invalid(self):
597+
"""Testing that when an extract is created with an invalid file type, an error is thrown."""
598+
599+
with self.assertRaises(Exception) as context:
600+
ExtractTracker(
601+
process_run=self.process_run,
602+
filename="Compression Type File.csv",
603+
location_name="Test Location",
604+
location_path="/home/test/extract_dir",
605+
filetype="zap",
606+
)
607+
608+
self.assertTrue("zap is not a valid file type" in str(context.exception))

0 commit comments

Comments
 (0)