Skip to content

Commit 8ceeafe

Browse files
committed
process_tracker_python-55 Read config file from s3
✨ Settings files should now be able to be read from an s3 location Added ability for settings config files to be read from s3. This will be helpful when wiring process_tracker into Lambda or Glue functions. Closes #55
1 parent 3954089 commit 8ceeafe

File tree

7 files changed

+544
-13
lines changed

7 files changed

+544
-13
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ ProcessTracker - Python
33
Data integration process management made easy!
44

55
[![Coverage Status](https://coveralls.io/repos/github/OpenDataAlex/process_tracker_python/badge.svg?branch=master)](https://coveralls.io/github/OpenDataAlex/process_tracker_python?branch=master)
6+
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/5600be1990974a4688a4fa4852edb5d5)](https://www.codacy.com/app/alexmeadows_2718/process_tracker_python?utm_source=github.com&utm_medium=referral&utm_content=OpenDataAlex/process_tracker_python&utm_campaign=Badge_Grade)
67
[![Build Status](https://travis-ci.org/OpenDataAlex/process_tracker_python.svg?branch=master)](https://travis-ci.org/OpenDataAlex/process_tracker_python)
78
[![Downloads](https://pepy.tech/badge/processtracker)](https://pepy.tech/project/processtracker)
89
[![PyPI version](https://badge.fury.io/py/processtracker.svg)](https://badge.fury.io/py/processtracker)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# AWS Utilities
2+
# Utilities for working with AWS services
3+
4+
import logging
5+
6+
import boto3
7+
from botocore.errorfactory import ClientError
8+
9+
10+
class AwsUtilities:
11+
def __init__(self):
12+
13+
self.logger = logging.getLogger(__name__)
14+
self.logger.setLevel("DEBUG")
15+
16+
self.s3 = boto3.resource("s3")
17+
18+
def determine_bucket_name(self, path):
19+
"""
20+
For the given path, return the bucket name, if path is a valid s3 URL.
21+
:param path: Valid s3 URL.
22+
:return:
23+
"""
24+
if self.determine_valid_s3_path(path=path):
25+
self.logger.debug("Parsing %s" % path)
26+
27+
if "s3://" in path:
28+
path = path[path.startswith("s3://") and len("s3://") :]
29+
30+
self.logger.debug("Path is now %s" % path)
31+
32+
bucket_name = path.split("/")[0]
33+
34+
self.logger.debug("Bucket name is %s" % bucket_name)
35+
36+
elif "s3" in path and ".amazonaws.com" in path:
37+
if path.startswith("http://"):
38+
39+
path = path[len("http://") :]
40+
41+
self.logger.debug("Path is now %s" % path)
42+
43+
elif path.startswith("https://"):
44+
45+
path = path[len("https://") :]
46+
self.logger.debug("Path is now %s" % path)
47+
48+
else:
49+
error_msg = "It appears the URL is not valid. %s" % path
50+
51+
self.logger.error(error_msg)
52+
raise Exception(error_msg)
53+
54+
bucket_name = path.split(".")[0]
55+
else:
56+
error_msg = "It appears the URL is not a valid s3 path. %s" % path
57+
58+
self.logger.error(error_msg)
59+
raise Exception(error_msg)
60+
61+
return bucket_name
62+
63+
def determine_file_key(self, path):
64+
"""
65+
Determine the key of the s3 file based on the filepath provided.
66+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
67+
:type path: str
68+
:return:
69+
"""
70+
71+
if "s3://" in path:
72+
groups = path.split("/", 3)
73+
74+
key = groups[3]
75+
76+
elif "s3" in path and ".amazonaws.com" in path:
77+
groups = path.split(".amazonaws.com/")
78+
79+
key = groups[1]
80+
81+
else:
82+
error_msg = "It appears the URL is not valid. %s" % path
83+
84+
self.logger.error(error_msg)
85+
raise Exception(error_msg)
86+
87+
return key
88+
89+
def determine_s3_file_exists(self, path):
90+
"""
91+
Determine if a file exists on s3 based on given path.
92+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
93+
:type path: str
94+
:return:
95+
"""
96+
self.logger.info("Determining if %s exists." % path)
97+
98+
bucket_name = self.determine_bucket_name(path=path)
99+
100+
key = self.determine_file_key(path=path)
101+
102+
try:
103+
self.s3.Object(bucket_name, key).load()
104+
105+
return True
106+
107+
except ClientError:
108+
error_msg = "File %s does not exist in s3." % path
109+
self.logger.error(error_msg)
110+
111+
return False
112+
113+
def determine_valid_s3_path(self, path):
114+
"""
115+
Take the provided path and determine if valid s3 URL.
116+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
117+
:type path: str
118+
:return:
119+
"""
120+
self.logger.debug("Validating %s" % path)
121+
if "s3://" in path:
122+
self.logger.debug("s3:// in path.")
123+
return True
124+
elif "s3" in path and ".amazonaws.com" in path:
125+
self.logger.debug("s3 and .amazonaws.com in path")
126+
return True
127+
else:
128+
self.logger.error("Path is invalid.")
129+
return False
130+
131+
def get_s3_bucket(self, bucket_name):
132+
133+
return self.s3.Bucket(bucket_name)
134+
135+
def read_from_s3(self, bucket_name, filename):
136+
"""
137+
With a given bucket and filename, read from s3.
138+
:param bucket:
139+
:param file:
140+
:return:
141+
"""
142+
143+
bucket = self.get_s3_bucket(bucket_name=bucket_name)

process_tracker/utilities/settings.py

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Settings manager and configuration, both for initialization and reading.
22

33
import configparser
4+
import logging
45
import os
56
from pathlib import Path
7+
import tempfile
8+
9+
from process_tracker.utilities.aws_utilities import AwsUtilities
610

711

812
class SettingsManager:
@@ -15,24 +19,57 @@ def __init__(self, config_location=None):
1519

1620
self.config = configparser.ConfigParser(allow_no_value=True)
1721

22+
self.logger = logging.getLogger(__name__)
23+
self.logger.setLevel("DEBUG")
24+
25+
self.aws_utils = AwsUtilities()
26+
27+
exists = False
28+
1829
if config_location is None:
19-
home = str(Path.home())
20-
self.config_path = os.path.join(home, ".process_tracker/")
21-
self.config_file = os.path.join(
22-
self.config_path, "process_tracker_config.ini"
30+
home = Path.home()
31+
32+
self.config_path = str(home.joinpath(".process_tracker/"))
33+
self.config_file = str(
34+
Path(self.config_path).joinpath("process_tracker_config.ini")
2335
)
2436

37+
exists = os.path.isfile(self.config_file)
38+
2539
else:
2640
self.config_path = config_location
27-
self.config_file = os.path.join(
28-
self.config_path, "process_tracker_config.ini"
29-
)
3041

31-
exists = os.path.isfile(self.config_file)
42+
if "process_tracker_config.ini" not in self.config_path:
43+
self.logger.debug(
44+
"process_tracker_config.ini not present. Appending to %s"
45+
% self.config_path
46+
)
47+
48+
self.config_file = self.config_path
49+
50+
if not self.config_file.endswith("/"):
51+
self.config_file += "/"
52+
53+
self.config_file += "process_tracker_config.ini"
54+
55+
self.logger.debug("Config file is now %s" % self.config_file)
56+
else:
57+
self.logger.debug(
58+
"process_tracker_config.ini present. Setting config_path to config_file."
59+
)
60+
self.config_file = self.config_path
61+
62+
if self.aws_utils.determine_valid_s3_path(
63+
path=self.config_path
64+
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):
65+
66+
exists = True
3267

3368
if exists:
3469
self.read_config_file()
3570
else:
71+
# How to handle if exists is false and it's s3?
72+
3673
self.create_config_file()
3774

3875
def create_config_file(self):
@@ -62,4 +99,22 @@ def read_config_file(self):
6299
:return:
63100
"""
64101

65-
return self.config.read(self.config_file)
102+
if self.aws_utils.determine_valid_s3_path(
103+
path=self.config_path
104+
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):
105+
106+
temp_file = tempfile.NamedTemporaryFile()
107+
bucket_name = self.aws_utils.determine_bucket_name(path=self.config_path)
108+
109+
bucket = self.aws_utils.get_s3_bucket(bucket_name=bucket_name)
110+
key = self.aws_utils.determine_file_key(path=self.config_file)
111+
112+
bucket.download_file(key, temp_file.name)
113+
114+
with open(temp_file.name, "r") as f:
115+
self.config.readfp(f)
116+
temp_file.close()
117+
118+
else:
119+
120+
return self.config.read(self.config_file)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[DEFAULT]
2+
log_level = DEBUG
3+
data_store_type = postgresql
4+
data_store_username = pt_admin_test
5+
data_store_password = arglebargle
6+
data_store_host = localhost
7+
data_store_port = 1234
8+
data_store_name = process_tracker

tests/test_process_tracker.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Tests for validating process_tracking works as expected.
22

33
from datetime import datetime, timedelta
4+
import logging
45
import os
56
from pathlib import Path
67
import time
@@ -29,7 +30,7 @@
2930
ProcessTargetObject,
3031
ProcessTracking,
3132
)
32-
from process_tracker.models.source import Source, SourceObject
33+
from process_tracker.models.source import Source
3334

3435
from process_tracker.data_store import DataStore
3536
from process_tracker.extract_tracker import ExtractTracker
@@ -43,6 +44,8 @@
4344
class TestProcessTracker(unittest.TestCase):
4445
@classmethod
4546
def setUpClass(cls):
47+
cls.logger = logging.Logger(__name__)
48+
4649
cls.data_store = DataStore()
4750
cls.session = cls.data_store.session
4851
cls.data_store_type = cls.data_store.data_store_type
@@ -696,6 +699,10 @@ def test_register_extracts_by_location_local(self):
696699

697700
self.assertCountEqual(expected_result, given_result)
698701

702+
@unittest.skipIf(
703+
"TRAVIS" in os.environ and os.environ["TRAVIS"] == "true",
704+
"Skipping this test on Travis CI.",
705+
)
699706
@mock_s3
700707
def test_register_extracts_by_location_s3(self):
701708
"""
@@ -740,9 +747,9 @@ def test_register_extracts_by_location_s3(self):
740747

741748
key = os.path.join(test_bucket, file)
742749

743-
print(file)
744-
print(key)
745-
print(fixtures_dir)
750+
self.logger.debug("Filename %s" % file)
751+
self.logger.debug("File key %s" % key)
752+
self.logger.debug("Fixtures dir %s" % fixtures_dir)
746753

747754
file = os.path.join(fixtures_dir, file)
748755
client.upload_file(Filename=file, Bucket=test_bucket, Key=key)

0 commit comments

Comments
 (0)