Skip to content

Commit d0859f3

Browse files
authored
Merge pull request #59 from OpenDataAlex/process_tracker_python-55
process_tracker_python-55 Read config file from s3
2 parents 3954089 + 8ceeafe commit d0859f3

File tree

7 files changed

+544
-13
lines changed

7 files changed

+544
-13
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ ProcessTracker - Python
33
Data integration process management made easy!
44

55
[![Coverage Status](https://coveralls.io/repos/github/OpenDataAlex/process_tracker_python/badge.svg?branch=master)](https://coveralls.io/github/OpenDataAlex/process_tracker_python?branch=master)
6+
[![Codacy Badge](https://api.codacy.com/project/badge/Grade/5600be1990974a4688a4fa4852edb5d5)](https://www.codacy.com/app/alexmeadows_2718/process_tracker_python?utm_source=github.com&utm_medium=referral&utm_content=OpenDataAlex/process_tracker_python&utm_campaign=Badge_Grade)
67
[![Build Status](https://travis-ci.org/OpenDataAlex/process_tracker_python.svg?branch=master)](https://travis-ci.org/OpenDataAlex/process_tracker_python)
78
[![Downloads](https://pepy.tech/badge/processtracker)](https://pepy.tech/project/processtracker)
89
[![PyPI version](https://badge.fury.io/py/processtracker.svg)](https://badge.fury.io/py/processtracker)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# AWS Utilities
2+
# Utilities for working with AWS services
3+
4+
import logging
5+
6+
import boto3
7+
from botocore.errorfactory import ClientError
8+
9+
10+
class AwsUtilities:
11+
def __init__(self):
12+
13+
self.logger = logging.getLogger(__name__)
14+
self.logger.setLevel("DEBUG")
15+
16+
self.s3 = boto3.resource("s3")
17+
18+
def determine_bucket_name(self, path):
19+
"""
20+
For the given path, return the bucket name, if path is a valid s3 URL.
21+
:param path: Valid s3 URL.
22+
:return:
23+
"""
24+
if self.determine_valid_s3_path(path=path):
25+
self.logger.debug("Parsing %s" % path)
26+
27+
if "s3://" in path:
28+
path = path[path.startswith("s3://") and len("s3://") :]
29+
30+
self.logger.debug("Path is now %s" % path)
31+
32+
bucket_name = path.split("/")[0]
33+
34+
self.logger.debug("Bucket name is %s" % bucket_name)
35+
36+
elif "s3" in path and ".amazonaws.com" in path:
37+
if path.startswith("http://"):
38+
39+
path = path[len("http://") :]
40+
41+
self.logger.debug("Path is now %s" % path)
42+
43+
elif path.startswith("https://"):
44+
45+
path = path[len("https://") :]
46+
self.logger.debug("Path is now %s" % path)
47+
48+
else:
49+
error_msg = "It appears the URL is not valid. %s" % path
50+
51+
self.logger.error(error_msg)
52+
raise Exception(error_msg)
53+
54+
bucket_name = path.split(".")[0]
55+
else:
56+
error_msg = "It appears the URL is not a valid s3 path. %s" % path
57+
58+
self.logger.error(error_msg)
59+
raise Exception(error_msg)
60+
61+
return bucket_name
62+
63+
def determine_file_key(self, path):
64+
"""
65+
Determine the key of the s3 file based on the filepath provided.
66+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
67+
:type path: str
68+
:return:
69+
"""
70+
71+
if "s3://" in path:
72+
groups = path.split("/", 3)
73+
74+
key = groups[3]
75+
76+
elif "s3" in path and ".amazonaws.com" in path:
77+
groups = path.split(".amazonaws.com/")
78+
79+
key = groups[1]
80+
81+
else:
82+
error_msg = "It appears the URL is not valid. %s" % path
83+
84+
self.logger.error(error_msg)
85+
raise Exception(error_msg)
86+
87+
return key
88+
89+
def determine_s3_file_exists(self, path):
90+
"""
91+
Determine if a file exists on s3 based on given path.
92+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
93+
:type path: str
94+
:return:
95+
"""
96+
self.logger.info("Determining if %s exists." % path)
97+
98+
bucket_name = self.determine_bucket_name(path=path)
99+
100+
key = self.determine_file_key(path=path)
101+
102+
try:
103+
self.s3.Object(bucket_name, key).load()
104+
105+
return True
106+
107+
except ClientError:
108+
error_msg = "File %s does not exist in s3." % path
109+
self.logger.error(error_msg)
110+
111+
return False
112+
113+
def determine_valid_s3_path(self, path):
114+
"""
115+
Take the provided path and determine if valid s3 URL.
116+
:param path: Full s3 filepath. Can be in s3:// or http(s):// format.
117+
:type path: str
118+
:return:
119+
"""
120+
self.logger.debug("Validating %s" % path)
121+
if "s3://" in path:
122+
self.logger.debug("s3:// in path.")
123+
return True
124+
elif "s3" in path and ".amazonaws.com" in path:
125+
self.logger.debug("s3 and .amazonaws.com in path")
126+
return True
127+
else:
128+
self.logger.error("Path is invalid.")
129+
return False
130+
131+
def get_s3_bucket(self, bucket_name):
132+
133+
return self.s3.Bucket(bucket_name)
134+
135+
def read_from_s3(self, bucket_name, filename):
136+
"""
137+
With a given bucket and filename, read from s3.
138+
:param bucket:
139+
:param file:
140+
:return:
141+
"""
142+
143+
bucket = self.get_s3_bucket(bucket_name=bucket_name)

process_tracker/utilities/settings.py

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# Settings manager and configuration, both for initialization and reading.
22

33
import configparser
4+
import logging
45
import os
56
from pathlib import Path
7+
import tempfile
8+
9+
from process_tracker.utilities.aws_utilities import AwsUtilities
610

711

812
class SettingsManager:
@@ -15,24 +19,57 @@ def __init__(self, config_location=None):
1519

1620
self.config = configparser.ConfigParser(allow_no_value=True)
1721

22+
self.logger = logging.getLogger(__name__)
23+
self.logger.setLevel("DEBUG")
24+
25+
self.aws_utils = AwsUtilities()
26+
27+
exists = False
28+
1829
if config_location is None:
19-
home = str(Path.home())
20-
self.config_path = os.path.join(home, ".process_tracker/")
21-
self.config_file = os.path.join(
22-
self.config_path, "process_tracker_config.ini"
30+
home = Path.home()
31+
32+
self.config_path = str(home.joinpath(".process_tracker/"))
33+
self.config_file = str(
34+
Path(self.config_path).joinpath("process_tracker_config.ini")
2335
)
2436

37+
exists = os.path.isfile(self.config_file)
38+
2539
else:
2640
self.config_path = config_location
27-
self.config_file = os.path.join(
28-
self.config_path, "process_tracker_config.ini"
29-
)
3041

31-
exists = os.path.isfile(self.config_file)
42+
if "process_tracker_config.ini" not in self.config_path:
43+
self.logger.debug(
44+
"process_tracker_config.ini not present. Appending to %s"
45+
% self.config_path
46+
)
47+
48+
self.config_file = self.config_path
49+
50+
if not self.config_file.endswith("/"):
51+
self.config_file += "/"
52+
53+
self.config_file += "process_tracker_config.ini"
54+
55+
self.logger.debug("Config file is now %s" % self.config_file)
56+
else:
57+
self.logger.debug(
58+
"process_tracker_config.ini present. Setting config_path to config_file."
59+
)
60+
self.config_file = self.config_path
61+
62+
if self.aws_utils.determine_valid_s3_path(
63+
path=self.config_path
64+
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):
65+
66+
exists = True
3267

3368
if exists:
3469
self.read_config_file()
3570
else:
71+
# How to handle if exists is false and it's s3?
72+
3673
self.create_config_file()
3774

3875
def create_config_file(self):
@@ -62,4 +99,22 @@ def read_config_file(self):
6299
:return:
63100
"""
64101

65-
return self.config.read(self.config_file)
102+
if self.aws_utils.determine_valid_s3_path(
103+
path=self.config_path
104+
) and self.aws_utils.determine_s3_file_exists(path=self.config_file):
105+
106+
temp_file = tempfile.NamedTemporaryFile()
107+
bucket_name = self.aws_utils.determine_bucket_name(path=self.config_path)
108+
109+
bucket = self.aws_utils.get_s3_bucket(bucket_name=bucket_name)
110+
key = self.aws_utils.determine_file_key(path=self.config_file)
111+
112+
bucket.download_file(key, temp_file.name)
113+
114+
with open(temp_file.name, "r") as f:
115+
self.config.readfp(f)
116+
temp_file.close()
117+
118+
else:
119+
120+
return self.config.read(self.config_file)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[DEFAULT]
2+
log_level = DEBUG
3+
data_store_type = postgresql
4+
data_store_username = pt_admin_test
5+
data_store_password = arglebargle
6+
data_store_host = localhost
7+
data_store_port = 1234
8+
data_store_name = process_tracker

tests/test_process_tracker.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Tests for validating process_tracking works as expected.
22

33
from datetime import datetime, timedelta
4+
import logging
45
import os
56
from pathlib import Path
67
import time
@@ -29,7 +30,7 @@
2930
ProcessTargetObject,
3031
ProcessTracking,
3132
)
32-
from process_tracker.models.source import Source, SourceObject
33+
from process_tracker.models.source import Source
3334

3435
from process_tracker.data_store import DataStore
3536
from process_tracker.extract_tracker import ExtractTracker
@@ -43,6 +44,8 @@
4344
class TestProcessTracker(unittest.TestCase):
4445
@classmethod
4546
def setUpClass(cls):
47+
cls.logger = logging.Logger(__name__)
48+
4649
cls.data_store = DataStore()
4750
cls.session = cls.data_store.session
4851
cls.data_store_type = cls.data_store.data_store_type
@@ -696,6 +699,10 @@ def test_register_extracts_by_location_local(self):
696699

697700
self.assertCountEqual(expected_result, given_result)
698701

702+
@unittest.skipIf(
703+
"TRAVIS" in os.environ and os.environ["TRAVIS"] == "true",
704+
"Skipping this test on Travis CI.",
705+
)
699706
@mock_s3
700707
def test_register_extracts_by_location_s3(self):
701708
"""
@@ -740,9 +747,9 @@ def test_register_extracts_by_location_s3(self):
740747

741748
key = os.path.join(test_bucket, file)
742749

743-
print(file)
744-
print(key)
745-
print(fixtures_dir)
750+
self.logger.debug("Filename %s" % file)
751+
self.logger.debug("File key %s" % key)
752+
self.logger.debug("Fixtures dir %s" % fixtures_dir)
746753

747754
file = os.path.join(fixtures_dir, file)
748755
client.upload_file(Filename=file, Bucket=test_bucket, Key=key)

0 commit comments

Comments
 (0)