Skip to content

Commit 06ff156

Browse files
committed
process_tracker_python-17 Audit Info for Extracts
✨ low and high dates, number of records can now be tracked on Extracts, both at write and at load. Extracts now can have their data's low and high dates as well as number of records tracked both when the file is written and when the file is loading. All audit fields are optional. Added table fields to sql scripts as well. Closes #17
1 parent b5ef4fe commit 06ff156

File tree

12 files changed

+419
-49
lines changed

12 files changed

+419
-49
lines changed

dbscripts/mysql_process_tracker.sql

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,30 +51,33 @@ create table location_lkup
5151
foreign key (location_type) references location_type_lkup (location_type_id)
5252
);
5353

54-
create table extract_tracking
54+
create table process_tracker.extract_tracking
5555
(
5656
extract_id int auto_increment
5757
primary key,
5858
extract_filename varchar(750) not null,
5959
extract_location_id int null,
6060
extract_status_id int null,
6161
extract_registration_date_time datetime not null,
62+
extract_write_low_date_time datetime null comment 'The lowest datetime of the data set as noted when writing the data file.',
63+
extract_write_high_date_time datetime null comment 'The highest datetime of the data set as noted when writing the data file.',
64+
extract_write_record_count int null comment 'The record count of the data set as noted when writing the data file.',
65+
extract_load_low_date_time datetime null comment 'The lowest datetime of the data set as noted when loading the data file. Should match the extract_write_low_date_time.',
66+
extract_load_high_date_time datetime null comment 'The highest datetime of the data set as noted when loading the data file. Should match the extract_load_high_date_time.',
67+
extract_load_record_count int null comment 'The record count of the data set when loading the data file.',
6268
constraint extract_filename
6369
unique (extract_filename),
6470
constraint extract_tracking_ibfk_1
65-
foreign key (extract_location_id) references location_lkup (location_id),
71+
foreign key (extract_location_id) references process_tracker.location_lkup (location_id),
6672
constraint extract_tracking_ibfk_2
67-
foreign key (extract_status_id) references extract_status_lkup (extract_status_id)
73+
foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id)
6874
);
6975

7076
create index extract_location_id
71-
on extract_tracking (extract_location_id);
77+
on process_tracker.extract_tracking (extract_location_id);
7278

7379
create index extract_status_id
74-
on extract_tracking (extract_status_id);
75-
76-
create index location_type
77-
on location_lkup (location_type);
80+
on process_tracker.extract_tracking (extract_status_id);
7881

7982
create table process_tracker.extract_dependency
8083
(

dbscripts/postgresql_process_tracker.sql

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -283,37 +283,57 @@ create unique index location_lkup_udx01
283283
create unique index location_lkup_udx02
284284
on location_lkup (location_path);
285285

286-
create table extract_tracking
286+
create table process_tracking.extract_tracking
287287
(
288288
extract_id serial not null
289289
constraint extract_tracking_pk
290290
primary key,
291291
extract_filename varchar(750) not null,
292292
extract_location_id integer not null
293293
constraint extract_tracking_fk01
294-
references location_lkup,
294+
references process_tracking.location_lkup,
295295
extract_process_run_id integer
296296
constraint extract_tracking_fk03
297-
references process_tracking,
297+
references process_tracking.process_tracking,
298298
extract_status_id integer
299299
constraint extract_tracking_fk02
300-
references extract_status_lkup,
301-
extract_registration_date_time timestamp not null
300+
references process_tracking.extract_status_lkup,
301+
extract_registration_date_time timestamp not null,
302+
extract_write_low_date_time timestamp,
303+
extract_write_high_date_time timestamp,
304+
extract_write_record_count integer,
305+
extract_load_low_date_time timestamp,
306+
extract_load_high_date_time timestamp,
307+
extract_load_record_count integer
302308
);
303309

304-
comment on table extract_tracking is 'Tracking table for all extract/staging data files.';
310+
comment on table process_tracking.extract_tracking is 'Tracking table for all extract/staging data files.';
311+
312+
comment on column process_tracking.extract_tracking.extract_filename is 'The unique filename for a given extract from a given source.';
313+
314+
comment on column process_tracking.extract_tracking.extract_location_id is 'The location where the given extract can be found.';
315+
316+
comment on column process_tracking.extract_tracking.extract_process_run_id is 'The process that registered or created the extract file.';
317+
318+
comment on column process_tracking.extract_tracking.extract_status_id is 'The status of the extract.';
319+
320+
comment on column process_tracking.extract_tracking.extract_registration_date_time is 'The datetime that the extract was loaded into extract tracking.';
321+
322+
comment on column process_tracking.extract_tracking.extract_write_low_date_time is 'The lowest datetime of the data set as noted when writing the data file.';
323+
324+
comment on column process_tracking.extract_tracking.extract_write_high_date_time is 'The highest datetime of the data set as noted when writing the data file.';
325+
326+
comment on column process_tracking.extract_tracking.extract_write_record_count is 'The record count of the data set as noted when writing the data file.';
305327

306-
comment on column extract_tracking.extract_filename is 'The unique filename for a given extract from a given source.';
328+
comment on column process_tracking.extract_tracking.extract_load_low_date_time is 'The lowest datetime of the data set as noted when loading the data file. Should match the extract_write_low_date_time.';
307329

308-
comment on column extract_tracking.extract_location_id is 'The location where the given extract can be found.';
330+
comment on column process_tracking.extract_tracking.extract_load_high_date_time is 'The highest datetime of the data set as noted when loading the data file.';
309331

310-
comment on column extract_tracking.extract_process_run_id is 'The process that registered or created the extract file.';
332+
comment on column process_tracking.extract_tracking.extract_load_record_count is 'The record count of the data set when loading the data file.';
311333

312-
comment on column extract_tracking.extract_status_id is 'The status of the extract.';
334+
alter table process_tracking.extract_tracking owner to pt_admin;
313335

314-
comment on column extract_tracking.extract_registration_date_time is 'The datetime that the extract was loaded into extract tracking.';
315336

316-
alter table extract_tracking owner to pt_admin;
317337

318338
create table extract_process_tracking
319339
(

process_tracker/extract_tracker.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from process_tracker.data_store import DataStore
1010
from process_tracker.location_tracker import LocationTracker
1111
from process_tracker.utilities.settings import SettingsManager
12+
from process_tracker.utilities import utilities
1213
from process_tracker.models.extract import (
1314
Extract,
1415
ExtractDependency,
@@ -252,3 +253,75 @@ def retrieve_extract_process(self):
252253
self.session.commit()
253254

254255
return extract_process
256+
257+
def set_extract_low_high_dates(self, low_date, high_date, audit_type="load"):
258+
"""
259+
For the given extract, find the low and high date_times while writing or loading.
260+
:param low_date: The low date of the data set.
261+
:type low_date: Datetime/timestamp
262+
:param high_date: The high date of the data set.
263+
:type high_date: Datetime/timestamp.
264+
:param audit_type: The type of audit fields being populated. Valid types: write, load
265+
:type audittype: String
266+
:return:
267+
"""
268+
269+
if audit_type == "write":
270+
271+
previous_low_date_time = self.extract.extract_write_low_date_time
272+
previous_high_date_time = self.extract.extract_write_high_date_time
273+
274+
if utilities.determine_low_high_date(
275+
date=low_date, previous_date=previous_low_date_time, date_type="low"
276+
):
277+
self.extract.extract_write_low_date_time = low_date
278+
279+
if utilities.determine_low_high_date(
280+
date=high_date, previous_date=previous_high_date_time, date_type="high"
281+
):
282+
self.extract.extract_write_high_date_time = high_date
283+
284+
elif audit_type == "load":
285+
286+
previous_low_date_time = self.extract.extract_load_low_date_time
287+
previous_high_date_time = self.extract.extract_load_high_date_time
288+
289+
if utilities.determine_low_high_date(
290+
date=low_date, previous_date=previous_low_date_time, date_type="low"
291+
):
292+
self.extract.extract_load_low_date_time = low_date
293+
294+
if utilities.determine_low_high_date(
295+
date=high_date, previous_date=previous_high_date_time, date_type="high"
296+
):
297+
self.extract.extract_load_high_date_time = high_date
298+
299+
else:
300+
self.logger.error("%s is not a valid audit_type." % audit_type)
301+
raise Exception("%s is not a valid audit_type." % audit_type)
302+
303+
self.session.commit()
304+
305+
def set_extract_record_count(self, num_records, audit_type="load"):
306+
"""
307+
For the given audit type, set the number of records for the given extract.
308+
:param num_records: Number of records tracked in extract
309+
:type num_records: int
310+
:param audit_type: The type of audit being populated. Valid types: write, load.
311+
:type audit_type: str
312+
:return:
313+
"""
314+
315+
if audit_type == "write":
316+
317+
self.extract.extract_write_record_count = num_records
318+
319+
elif audit_type == "load":
320+
321+
self.extract.extract_load_record_count = num_records
322+
323+
else:
324+
self.logger.error("%s is not a valid audit_type." % audit_type)
325+
raise Exception("%s is not a valid audit_type." % audit_type)
326+
327+
self.session.commit()

process_tracker/models/extract.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ class Extract(Base):
4444
extract_registration_date_time = Column(
4545
DateTime, nullable=False, default=datetime.now()
4646
)
47+
extract_write_low_date_time = Column(DateTime, nullable=True)
48+
extract_write_high_date_time = Column(DateTime, nullable=True)
49+
extract_write_record_count = Column(Integer, nullable=True)
50+
extract_load_low_date_time = Column(DateTime, nullable=True)
51+
extract_load_high_date_time = Column(DateTime, nullable=True)
52+
extract_load_record_count = Column(Integer, nullable=True)
4753

4854
extract_process = relationship("ExtractProcess", back_populates="process_extracts")
4955
locations = relationship("Location", foreign_keys=[extract_location_id])

process_tracker/process_tracker.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from process_tracker.location_tracker import LocationTracker
1313
from process_tracker.utilities.logging import console
1414
from process_tracker.utilities.settings import SettingsManager
15+
from process_tracker.utilities import utilities
1516

1617
from process_tracker.models.actor import Actor
1718
from process_tracker.models.extract import (
@@ -446,14 +447,16 @@ def set_process_run_low_high_dates(self, low_date=None, high_date=None):
446447
previous_low_date_time = self.process_tracking_run.process_run_low_date_time
447448
previous_high_date_time = self.process_tracking_run.process_run_low_date_time
448449

449-
if low_date is not None and (
450-
previous_low_date_time is None or low_date < previous_low_date_time
450+
if utilities.determine_low_high_date(
451+
date=low_date, previous_date=previous_low_date_time, date_type="low"
451452
):
453+
452454
self.process_tracking_run.process_run_low_date_time = low_date
453455

454-
if high_date is not None and (
455-
previous_high_date_time is None or high_date > previous_high_date_time
456+
if utilities.determine_low_high_date(
457+
date=high_date, previous_date=previous_high_date_time, date_type="high"
456458
):
459+
457460
self.process_tracking_run.process_run_high_date_time = high_date
458461

459462
self.session.commit()

process_tracker/utilities/settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ def __init__(self, config_location=None):
2323
)
2424

2525
else:
26-
self.config_path = os.path.join(
27-
config_location, "process_tracker_config.ini"
26+
self.config_path = config_location
27+
self.config_file = os.path.join(
28+
self.config_path, "process_tracker_config.ini"
2829
)
2930

3031
exists = os.path.isfile(self.config_file)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Space for generalized helpers that can be utilized across the entire framework.
3+
"""
4+
import logging
5+
6+
from process_tracker.utilities.settings import SettingsManager
7+
8+
config = SettingsManager().config
9+
10+
logger = logging.getLogger(__name__)
11+
logger.setLevel(config["DEFAULT"]["log_level"])
12+
13+
14+
def determine_low_high_date(date, previous_date, date_type):
15+
"""
16+
For the given dates and date type, determine if the date replaces the previous date or not.
17+
:param date: The new datetime.
18+
:type date: Datetime/timestamp
19+
:param previous_date: The previous datetime that date is being compared to.
20+
:type previous_date: Datetime/timestamp
21+
:param date_type: Is the comparison for a low date or high date? Valid values: low, high
22+
:type date_type: str
23+
:return: Boolean if date replaces previous_date
24+
"""
25+
26+
if date_type == "low":
27+
28+
if date is not None and (previous_date is None or date < previous_date):
29+
return True
30+
else:
31+
return False
32+
33+
elif date_type == "high":
34+
35+
if date is not None and (previous_date is None or previous_date > date):
36+
return True
37+
else:
38+
return False
39+
40+
else:
41+
logger.error("%s is not a valid date_type." % date_type)
42+
raise Exception("%s is not a valid date_type." % date_type)
43+
44+
45+
def timestamp_converter(data_store_type, timestamp):
46+
"""
47+
Helper function for when testing with data stores that have funky formats for stock dates with SQLAlchemy.
48+
:param data_store_type: The type of data store
49+
:param timestamp: The timestamp to be created.
50+
:return:
51+
"""
52+
53+
if data_store_type == "mysql":
54+
timestamp = timestamp.replace(microsecond=0)
55+
else:
56+
timestamp = timestamp
57+
58+
return timestamp

0 commit comments

Comments
 (0)