Skip to content

Commit eeb54d3

Browse files
authored
Merge pull request #36 from OpenDataAlex/process_tracker_python-17
Process tracker python 17
2 parents b5ef4fe + 5bb2bc8 commit eeb54d3

File tree

12 files changed

+406
-38
lines changed

12 files changed

+406
-38
lines changed

dbscripts/mysql_process_tracker.sql

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,30 +51,33 @@ create table location_lkup
5151
foreign key (location_type) references location_type_lkup (location_type_id)
5252
);
5353

54-
create table extract_tracking
54+
create table process_tracker.extract_tracking
5555
(
5656
extract_id int auto_increment
5757
primary key,
5858
extract_filename varchar(750) not null,
5959
extract_location_id int null,
6060
extract_status_id int null,
6161
extract_registration_date_time datetime not null,
62+
extract_write_low_date_time datetime null comment 'The lowest datetime of the data set as noted when writing the data file.',
63+
extract_write_high_date_time datetime null comment 'The highest datetime of the data set as noted when writing the data file.',
64+
extract_write_record_count int null comment 'The record count of the data set as noted when writing the data file.',
65+
extract_load_low_date_time datetime null comment 'The lowest datetime of the data set as noted when loading the data file. Should match the extract_write_low_date_time.',
66+
extract_load_high_date_time datetime null comment 'The highest datetime of the data set as noted when loading the data file. Should match the extract_load_high_date_time.',
67+
extract_load_record_count int null comment 'The record count of the data set when loading the data file.',
6268
constraint extract_filename
6369
unique (extract_filename),
6470
constraint extract_tracking_ibfk_1
65-
foreign key (extract_location_id) references location_lkup (location_id),
71+
foreign key (extract_location_id) references process_tracker.location_lkup (location_id),
6672
constraint extract_tracking_ibfk_2
67-
foreign key (extract_status_id) references extract_status_lkup (extract_status_id)
73+
foreign key (extract_status_id) references process_tracker.extract_status_lkup (extract_status_id)
6874
);
6975

7076
create index extract_location_id
71-
on extract_tracking (extract_location_id);
77+
on process_tracker.extract_tracking (extract_location_id);
7278

7379
create index extract_status_id
74-
on extract_tracking (extract_status_id);
75-
76-
create index location_type
77-
on location_lkup (location_type);
80+
on process_tracker.extract_tracking (extract_status_id);
7881

7982
create table process_tracker.extract_dependency
8083
(

dbscripts/postgresql_process_tracker.sql

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,13 @@ create table extract_tracking
298298
extract_status_id integer
299299
constraint extract_tracking_fk02
300300
references extract_status_lkup,
301-
extract_registration_date_time timestamp not null
301+
extract_registration_date_time timestamp not null,
302+
extract_write_low_date_time timestamp,
303+
extract_write_high_date_time timestamp,
304+
extract_write_record_count integer,
305+
extract_load_low_date_time timestamp,
306+
extract_load_high_date_time timestamp,
307+
extract_load_record_count integer
302308
);
303309

304310
comment on table extract_tracking is 'Tracking table for all extract/staging data files.';
@@ -313,6 +319,18 @@ comment on column extract_tracking.extract_status_id is 'The status of the extra
313319

314320
comment on column extract_tracking.extract_registration_date_time is 'The datetime that the extract was loaded into extract tracking.';
315321

322+
comment on column extract_tracking.extract_write_low_date_time is 'The lowest datetime of the data set as noted when writing the data file.';
323+
324+
comment on column extract_tracking.extract_write_high_date_time is 'The highest datetime of the data set as noted when writing the data file.';
325+
326+
comment on column extract_tracking.extract_write_record_count is 'The record count of the data set as noted when writing the data file.';
327+
328+
comment on column extract_tracking.extract_load_low_date_time is 'The lowest datetime of the data set as noted when loading the data file. Should match the extract_write_low_date_time.';
329+
330+
comment on column extract_tracking.extract_load_high_date_time is 'The highest datetime of the data set as noted when loading the data file.';
331+
332+
comment on column extract_tracking.extract_load_record_count is 'The record count of the data set when loading the data file.';
333+
316334
alter table extract_tracking owner to pt_admin;
317335

318336
create table extract_process_tracking

process_tracker/extract_tracker.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from process_tracker.data_store import DataStore
1010
from process_tracker.location_tracker import LocationTracker
1111
from process_tracker.utilities.settings import SettingsManager
12+
from process_tracker.utilities import utilities
1213
from process_tracker.models.extract import (
1314
Extract,
1415
ExtractDependency,
@@ -252,3 +253,75 @@ def retrieve_extract_process(self):
252253
self.session.commit()
253254

254255
return extract_process
256+
257+
def set_extract_low_high_dates(self, low_date, high_date, audit_type="load"):
258+
"""
259+
For the given extract, find the low and high date_times while writing or loading.
260+
:param low_date: The low date of the data set.
261+
:type low_date: Datetime/timestamp
262+
:param high_date: The high date of the data set.
263+
:type high_date: Datetime/timestamp.
264+
:param audit_type: The type of audit fields being populated. Valid types: write, load
265+
:type audittype: String
266+
:return:
267+
"""
268+
269+
if audit_type == "write":
270+
271+
previous_low_date_time = self.extract.extract_write_low_date_time
272+
previous_high_date_time = self.extract.extract_write_high_date_time
273+
274+
if utilities.determine_low_high_date(
275+
date=low_date, previous_date=previous_low_date_time, date_type="low"
276+
):
277+
self.extract.extract_write_low_date_time = low_date
278+
279+
if utilities.determine_low_high_date(
280+
date=high_date, previous_date=previous_high_date_time, date_type="high"
281+
):
282+
self.extract.extract_write_high_date_time = high_date
283+
284+
elif audit_type == "load":
285+
286+
previous_low_date_time = self.extract.extract_load_low_date_time
287+
previous_high_date_time = self.extract.extract_load_high_date_time
288+
289+
if utilities.determine_low_high_date(
290+
date=low_date, previous_date=previous_low_date_time, date_type="low"
291+
):
292+
self.extract.extract_load_low_date_time = low_date
293+
294+
if utilities.determine_low_high_date(
295+
date=high_date, previous_date=previous_high_date_time, date_type="high"
296+
):
297+
self.extract.extract_load_high_date_time = high_date
298+
299+
else:
300+
self.logger.error("%s is not a valid audit_type." % audit_type)
301+
raise Exception("%s is not a valid audit_type." % audit_type)
302+
303+
self.session.commit()
304+
305+
def set_extract_record_count(self, num_records, audit_type="load"):
306+
"""
307+
For the given audit type, set the number of records for the given extract.
308+
:param num_records: Number of records tracked in extract
309+
:type num_records: int
310+
:param audit_type: The type of audit being populated. Valid types: write, load.
311+
:type audit_type: str
312+
:return:
313+
"""
314+
315+
if audit_type == "write":
316+
317+
self.extract.extract_write_record_count = num_records
318+
319+
elif audit_type == "load":
320+
321+
self.extract.extract_load_record_count = num_records
322+
323+
else:
324+
self.logger.error("%s is not a valid audit_type." % audit_type)
325+
raise Exception("%s is not a valid audit_type." % audit_type)
326+
327+
self.session.commit()

process_tracker/models/extract.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ class Extract(Base):
4444
extract_registration_date_time = Column(
4545
DateTime, nullable=False, default=datetime.now()
4646
)
47+
extract_write_low_date_time = Column(DateTime, nullable=True)
48+
extract_write_high_date_time = Column(DateTime, nullable=True)
49+
extract_write_record_count = Column(Integer, nullable=True)
50+
extract_load_low_date_time = Column(DateTime, nullable=True)
51+
extract_load_high_date_time = Column(DateTime, nullable=True)
52+
extract_load_record_count = Column(Integer, nullable=True)
4753

4854
extract_process = relationship("ExtractProcess", back_populates="process_extracts")
4955
locations = relationship("Location", foreign_keys=[extract_location_id])

process_tracker/process_tracker.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from process_tracker.location_tracker import LocationTracker
1313
from process_tracker.utilities.logging import console
1414
from process_tracker.utilities.settings import SettingsManager
15+
from process_tracker.utilities import utilities
1516

1617
from process_tracker.models.actor import Actor
1718
from process_tracker.models.extract import (
@@ -446,14 +447,16 @@ def set_process_run_low_high_dates(self, low_date=None, high_date=None):
446447
previous_low_date_time = self.process_tracking_run.process_run_low_date_time
447448
previous_high_date_time = self.process_tracking_run.process_run_low_date_time
448449

449-
if low_date is not None and (
450-
previous_low_date_time is None or low_date < previous_low_date_time
450+
if utilities.determine_low_high_date(
451+
date=low_date, previous_date=previous_low_date_time, date_type="low"
451452
):
453+
452454
self.process_tracking_run.process_run_low_date_time = low_date
453455

454-
if high_date is not None and (
455-
previous_high_date_time is None or high_date > previous_high_date_time
456+
if utilities.determine_low_high_date(
457+
date=high_date, previous_date=previous_high_date_time, date_type="high"
456458
):
459+
457460
self.process_tracking_run.process_run_high_date_time = high_date
458461

459462
self.session.commit()

process_tracker/utilities/settings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ def __init__(self, config_location=None):
2323
)
2424

2525
else:
26-
self.config_path = os.path.join(
27-
config_location, "process_tracker_config.ini"
26+
self.config_path = config_location
27+
self.config_file = os.path.join(
28+
self.config_path, "process_tracker_config.ini"
2829
)
2930

3031
exists = os.path.isfile(self.config_file)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Space for generalized helpers that can be utilized across the entire framework.
3+
"""
4+
import logging
5+
6+
from process_tracker.utilities.settings import SettingsManager
7+
8+
config = SettingsManager().config
9+
10+
logger = logging.getLogger(__name__)
11+
logger.setLevel(config["DEFAULT"]["log_level"])
12+
13+
14+
def determine_low_high_date(date, previous_date, date_type):
15+
"""
16+
For the given dates and date type, determine if the date replaces the previous date or not.
17+
:param date: The new datetime.
18+
:type date: Datetime/timestamp
19+
:param previous_date: The previous datetime that date is being compared to.
20+
:type previous_date: Datetime/timestamp
21+
:param date_type: Is the comparison for a low date or high date? Valid values: low, high
22+
:type date_type: str
23+
:return: Boolean if date replaces previous_date
24+
"""
25+
26+
if date_type == "low":
27+
28+
if date is not None and (previous_date is None or date < previous_date):
29+
return True
30+
else:
31+
return False
32+
33+
elif date_type == "high":
34+
35+
if date is not None and (previous_date is None or previous_date > date):
36+
return True
37+
else:
38+
return False
39+
40+
else:
41+
logger.error("%s is not a valid date_type." % date_type)
42+
raise Exception("%s is not a valid date_type." % date_type)
43+
44+
45+
def timestamp_converter(data_store_type, timestamp):
46+
"""
47+
Helper function for when testing with data stores that have funky formats for stock dates with SQLAlchemy.
48+
:param data_store_type: The type of data store
49+
:param timestamp: The timestamp to be created.
50+
:return:
51+
"""
52+
53+
if data_store_type == "mysql":
54+
timestamp = timestamp.replace(microsecond=0)
55+
else:
56+
timestamp = timestamp
57+
58+
return timestamp

0 commit comments

Comments
 (0)