Skip to content

Commit 3fc7d8f

Browse files
committed
Reconfiguring code to prepare for packaging
👷 Travis-CI and Coveralls working 🏗️ Moved data_store initialization to DataStore Verifying that build stack is working, sans package deploy to PyPi. Initial configuration for PyPi is in travis config, but disabled. This commit is the first steps for getting the package ready for deploy. Major refactoring had to be done to ensure that code is only run when called as opposed to on import. Closes: #5
1 parent e981d79 commit 3fc7d8f

File tree

15 files changed

+212
-180
lines changed

15 files changed

+212
-180
lines changed

.travis.yml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
11
dist: xenial
22
language: python
33
python:
4-
- "3.7"
4+
- '3.7'
55
install:
6-
- "make"
6+
- make
77
services:
8-
- postgresql
8+
- postgresql
99
script:
10-
- make test
10+
- make test
1111
after_success:
12-
- coveralls
12+
- coveralls
1313
before_script:
14-
- psql -f dbscripts/postgresql_account_create.sql -U postgres
15-
- psql -f dbscripts/postgresql_process_tracker.sql process_tracking -U postgres
14+
- psql -f dbscripts/postgresql_account_create.sql -U postgres
15+
- psql -f dbscripts/postgresql_process_tracker.sql process_tracking -U postgres
1616
env:
17-
- process_tracking_data_store_host=localhost process_tracking_data_store_port=5432 process_tracking_data_store_name=process_tracking process_tracking_data_store_type=postgresql process_tracking_data_store_password=Testing1! process_tracking_data_store_username=pt_admin
17+
- process_tracking_data_store_host=localhost process_tracking_data_store_port=5432
18+
process_tracking_data_store_name=process_tracking process_tracking_data_store_type=postgresql
19+
process_tracking_data_store_password=Testing1! process_tracking_data_store_username=pt_admin
20+
#deploy:
21+
# matrix:
22+
# - provider: pypi
23+
# - user: opendataalex
24+
# - password:
25+
# secure: jYe8ygGYuJox5f1vQvraIDNlR2Yk/+/afIXdsxaxRi/QN135rIHxRih24iYv8ZQewfW+w4CveEPz2pr0ew5NG8hm2MD5hQZI/A2QE4BC53Tr3xMAWSdPQgcI1rX4Ieu6BHTS3jet212yLuBk41GdASKztz+s/CVvVqT0A29U1b85OVzoVHz6OTgaM1m0pKeJE6Ho66r5O6J7ClZfEY4RJ8jROmrT7ufW3hEs/VQXwfAUWcjy6l5H5VYTLa/g94MgQ+0WmgcdUbB8s3c2q7n4RiaVEowZOLtBmRElDIPiLJC8JGAIFKcf5W8pzXK/ZhZhY3ppgmBvRVeyPdiAwjysionzl4vIlNxC28acI3HzvJbAdvwPsuMD+pnTo/LKRqqvGey2NSnMPmzGXxzqVUqQ5Wa1Yzdcqg8mbWHFRw9icMWKM84CnDBvqkmJcfnj5R4X6Rc77mr9jc5LMMBBblNtY7dQc2AooframujGA2mP1xABQ9vOSg89MdnlhHrgEK6m+Pz0S2hhlsGRw9yMF9L4Q1ie7xKeE49L3HKZSjKEgSMv9y9RbqnmW32F0UoKgWkCJE8DV9wbZe5Htn+lpwmXI0vu+cxnqsiZvG8wjm+klJGn5F84OFTN3zQOC/kIBXQXvutZYtgF0vofdCmKQzK9istl16Eur1r4NCbov3oM97o=
26+
# on:
27+
# tags: true

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ ProcessTracker - Python
33
Data integration process management made easy!
44

55
[![Coverage Status](https://coveralls.io/repos/github/OpenDataAlex/process_tracker_python/badge.svg?branch=master)](https://coveralls.io/github/OpenDataAlex/process_tracker_python?branch=master)
6+
[![Build Status](https://travis-ci.org/OpenDataAlex/process_tracker_python.svg?branch=master)](https://travis-ci.org/OpenDataAlex/process_tracker_python)
67

78
This is the Python implementation of the ProcessTracker framework. ProcessTracker builds a standard framework that is
89
tool agnostic. If you are working with data integration/cleansing processes within Python (i.e. using PySpark, Pandas, etc.)

process_tracker/__init__.py

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +0,0 @@
1-
import os
2-
import logging
3-
4-
from sqlalchemy import create_engine
5-
from sqlalchemy.orm import sessionmaker
6-
7-
log = logging.getLogger(__name__)
8-
9-
"""
10-
Based on environment variables, create the data store connection engine.
11-
:return:
12-
"""
13-
14-
data_store_type = os.environ.get('process_tracking_data_store_type')
15-
data_store_username = os.environ.get('process_tracking_data_store_username')
16-
data_store_password = os.environ.get('process_tracking_data_store_password')
17-
data_store_host = os.environ.get('process_tracking_data_store_host')
18-
data_store_port = os.environ.get('process_tracking_data_store_port')
19-
data_store_name = os.environ.get('process_tracking_data_store_name')
20-
21-
data_store_error_flag = False
22-
23-
if data_store_type is None:
24-
raise Exception('Data store type is not set.')
25-
data_store_error_flag = True
26-
27-
if data_store_username is None:
28-
raise Exception('Data store username is not set.')
29-
data_store_error_flag = True
30-
31-
if data_store_password is None:
32-
raise Exception('Data store password is not set')
33-
data_store_error_flag = True
34-
35-
if data_store_host is None:
36-
raise Exception('Data store host is not set')
37-
data_store_error_flag = True
38-
39-
if data_store_port is None:
40-
raise Exception('Data store port is not set')
41-
data_store_error_flag = True
42-
43-
if data_store_name is None:
44-
raise Exception('Data store name is not set')
45-
data_store_error_flag = True
46-
47-
if data_store_error_flag:
48-
raise Exception('Data store has not been properly configured. Please read how to set up the Process '
49-
'Tracking data store by going to: <insert read the docs url here>')
50-
51-
relational_stores = ['postgresql']
52-
nonrelational_stores = []
53-
54-
supported_data_stores = relational_stores + nonrelational_stores
55-
56-
if data_store_type in supported_data_stores:
57-
58-
if data_store_type in relational_stores:
59-
if data_store_type == 'postgresql':
60-
engine = create_engine(data_store_type + '://' + data_store_username + ':' + data_store_password
61-
+ '@' + data_store_host + '/' + data_store_name)
62-
63-
log.info("Attempting to connect to datastore %s, found at %s:%s" % (data_store_name
64-
, data_store_host
65-
, data_store_port))
66-
67-
Session = sessionmaker(bind=engine)
68-
69-
session = Session(expire_on_commit=False)
70-
session.execute("SET search_path TO %s" % data_store_name)
71-
72-
elif data_store_type in nonrelational_stores:
73-
Session = ''
74-
75-
76-
else:
77-
raise Exception('Invalid data store type provided. Please use: ' + ", ".join(supported_data_stores))
78-
exit()

process_tracker/data_store.py

Lines changed: 103 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
1-
from process_tracker import session
1+
import os
2+
import logging
3+
4+
from sqlalchemy import create_engine
5+
from sqlalchemy.orm import sessionmaker
26

37

48
class DataStore:
59

6-
@staticmethod
7-
def get_or_create(model, create=True, **kwargs):
10+
def __init__(self):
11+
"""
12+
Need to initialize the data store connection when starting to access the data store.
13+
"""
14+
self.logger = logging.getLogger(__name__)
15+
16+
data_store = self.verify_and_connect_to_data_store()
17+
self.session = data_store['session']
18+
self.data_store_type = data_store['data_store_type']
19+
self.data_store_host = data_store['data_store_host']
20+
self.data_store_port = data_store['data_store_port']
21+
self.data_store_name = data_store['data_store_name']
22+
23+
def get_or_create(self, model, create=True, **kwargs):
824
"""
925
Testing if an entity instance exists or not. If does, return entity key. If not, create entity instance
1026
and return key.
@@ -16,16 +32,97 @@ def get_or_create(model, create=True, **kwargs):
1632
:return:
1733
"""
1834

19-
instance = session.query(model).filter_by(**kwargs).first()
35+
instance = self.session.query(model).filter_by(**kwargs).first()
2036

2137
if not instance:
2238

2339
if create:
2440
instance = model(**kwargs)
25-
session.add(instance)
26-
session.commit()
41+
self.session.add(instance)
42+
self.session.commit()
2743

2844
else:
2945
raise Exception('There is no record match in %s .' % model.__tablename__)
3046

3147
return instance
48+
49+
def verify_and_connect_to_data_store(self):
50+
"""
51+
Based on environment variables, create the data store connection engine.
52+
:return:
53+
"""
54+
55+
data_store_type = os.environ.get('process_tracking_data_store_type')
56+
data_store_username = os.environ.get('process_tracking_data_store_username')
57+
data_store_password = os.environ.get('process_tracking_data_store_password')
58+
data_store_host = os.environ.get('process_tracking_data_store_host')
59+
data_store_port = os.environ.get('process_tracking_data_store_port')
60+
data_store_name = os.environ.get('process_tracking_data_store_name')
61+
62+
data_store_error_flag = False
63+
64+
if data_store_type is None:
65+
raise Exception('Data store type is not set.')
66+
data_store_error_flag = True
67+
68+
if data_store_username is None:
69+
raise Exception('Data store username is not set.')
70+
data_store_error_flag = True
71+
72+
if data_store_password is None:
73+
raise Exception('Data store password is not set')
74+
data_store_error_flag = True
75+
76+
if data_store_host is None:
77+
raise Exception('Data store host is not set')
78+
data_store_error_flag = True
79+
80+
if data_store_port is None:
81+
raise Exception('Data store port is not set')
82+
data_store_error_flag = True
83+
84+
if data_store_name is None:
85+
raise Exception('Data store name is not set')
86+
data_store_error_flag = True
87+
88+
if data_store_error_flag:
89+
raise Exception('Data store has not been properly configured. Please read how to set up the Process '
90+
'Tracking data store by going to: <insert read the docs url here>')
91+
92+
relational_stores = ['postgresql']
93+
nonrelational_stores = []
94+
95+
supported_data_stores = relational_stores + nonrelational_stores
96+
97+
if data_store_type in supported_data_stores:
98+
99+
if data_store_type in relational_stores:
100+
if data_store_type == 'postgresql':
101+
engine = create_engine(data_store_type + '://' + data_store_username + ':' + data_store_password
102+
+ '@' + data_store_host + '/' + data_store_name)
103+
104+
self.logger.info("Attempting to connect to datastore %s, found at %s:%s" % (data_store_name
105+
, data_store_host
106+
, data_store_port))
107+
108+
Session = sessionmaker(bind=engine)
109+
110+
session = Session(expire_on_commit=False)
111+
session.execute("SET search_path TO %s" % data_store_name)
112+
113+
elif data_store_type in nonrelational_stores:
114+
Session = ''
115+
116+
data_store = dict()
117+
data_store['session'] = session
118+
data_store['data_store_type'] = data_store_type
119+
data_store['data_store_host'] = data_store_host
120+
data_store['data_store_port'] = data_store_port
121+
data_store['data_store_name'] = data_store_name
122+
data_store['data_store_username'] = data_store_username
123+
data_store['data_store_password'] = data_store_password
124+
125+
return data_store
126+
else:
127+
raise Exception('Invalid data store type provided. Please use: ' + ", ".join(supported_data_stores))
128+
exit()

process_tracker/extract_tracker.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
from datetime import datetime
44
from os.path import basename, normpath
55

6-
from process_tracker import session
76
from process_tracker.data_store import DataStore
87

9-
from models.extract import Extract, ExtractProcess, ExtractStatus, Location
8+
from process_tracker.models.extract import Extract, ExtractProcess, ExtractStatus, Location
109

1110

1211
class ExtractTracker:
@@ -26,6 +25,7 @@ def __init__(self, process_run, filename, location_path, location_name=None):
2625
:type location_name: string
2726
"""
2827
self.data_store = DataStore()
28+
self.session = self.data_store.session
2929
self.process_run = process_run
3030

3131
if location_name is None:
@@ -59,7 +59,7 @@ def __init__(self, process_run, filename, location_path, location_name=None):
5959
self.extract_process = self.retrieve_extract_process()
6060

6161
self.extract.extract_status_id = self.extract_status_initializing
62-
session.commit()
62+
self.session.commit()
6363

6464
def change_extract_status(self, new_status):
6565
"""
@@ -75,7 +75,7 @@ def change_extract_status(self, new_status):
7575
self.extract_process.extract_process_status_id = new_status
7676
self.extract_process.extract_process_event_date_time = status_date
7777

78-
session.commit()
78+
self.session.commit()
7979

8080
else:
8181
raise Exception('%s is not a valid extract status type. '
@@ -109,15 +109,14 @@ def derive_location_name(location_path):
109109

110110
return location_name
111111

112-
@staticmethod
113-
def get_extract_status_types():
112+
def get_extract_status_types(self):
114113
"""
115114
Get list of process status types and return dictionary.
116115
:return:
117116
"""
118117
status_types = {}
119118

120-
for record in session.query(ExtractStatus):
119+
for record in self.session.query(ExtractStatus):
121120
status_types[record.extract_status_name] = record.extract_status_id
122121

123122
return status_types
@@ -137,6 +136,6 @@ def retrieve_extract_process(self):
137136
if extract_process.extract_process_status_id is None:
138137

139138
extract_process.extract_process_status_id = self.extract_status_initializing
140-
session.commit()
139+
self.session.commit()
141140

142141
return extract_process
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
# Models for Actor entities
33

44
from sqlalchemy import Column, Integer, Sequence, String
5-
from sqlalchemy_utils.types import uuid
6-
from models.model_base import Base
5+
from process_tracker.models.model_base import Base
76

87

98
class Actor(Base):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from sqlalchemy import Column, DateTime, ForeignKey, Integer, Sequence, String
77
from sqlalchemy.orm import relationship
88

9-
from models.model_base import Base
9+
from process_tracker.models.model_base import Base
1010

1111

1212
class ExtractStatus(Base):
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Integer, Sequence, String
66
from sqlalchemy.orm import relationship
77

8-
from models.model_base import default_date, Base
9-
from models.extract import Extract
8+
from process_tracker.models.model_base import default_date, Base
109

1110

1211
class ErrorType(Base):

0 commit comments

Comments
 (0)