From 0e5e3753749558e2018d66b0042981e0b7833256 Mon Sep 17 00:00:00 2001 From: Tony Primerano Date: Mon, 15 Mar 2021 10:45:52 -0400 Subject: [PATCH 1/2] fix local client to honor DOWNLOAD_PATH prefix and adjusted package versions to work --- lexpredict_openedgar/openedgar/clients/local.py | 13 +++++++------ lexpredict_openedgar/openedgar/tasks.py | 2 +- lexpredict_openedgar/requirements/full.txt | 10 +++++----- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lexpredict_openedgar/openedgar/clients/local.py b/lexpredict_openedgar/openedgar/clients/local.py index 2a7fa62..ffcf706 100644 --- a/lexpredict_openedgar/openedgar/clients/local.py +++ b/lexpredict_openedgar/openedgar/clients/local.py @@ -35,24 +35,25 @@ class LocalClient: - - def __init__(self): + prefix = '' + def __init__(self, prefix=''): + self.prefix=prefix logger.info("Initialized local client") def path_exists(self, path: str): - return os.path.exists(path) + return os.path.exists(os.path.join(self.prefix, path)) def put_buffer(self, file_path: str, buffer, write_bytes=True): - dir_name = os.path.dirname(file_path) + dir_name = os.path.dirname(os.path.join(self.prefix, file_path)) if not os.path.exists(dir_name): os.makedirs(dir_name) if write_bytes: mode="wb" else: mode="w" - with open(file_path, mode=mode) as localfile: + with open(os.path.join(self.prefix, file_path), mode=mode) as localfile: localfile.write(buffer) def get_buffer(self, file_path: str): - with open(file_path, mode='rb') as localfile: + with open(os.path.join(self.prefix, file_path), mode='rb') as localfile: return localfile.read() diff --git a/lexpredict_openedgar/openedgar/tasks.py b/lexpredict_openedgar/openedgar/tasks.py index e4b2380..3921b3c 100755 --- a/lexpredict_openedgar/openedgar/tasks.py +++ b/lexpredict_openedgar/openedgar/tasks.py @@ -203,7 +203,7 @@ def process_filing_index(client_type: str, file_path: str, filing_index_buffer: if client_type == "S3": client = S3Client() else: - client = LocalClient() + client = LocalClient(os.environ["DOWNLOAD_PATH"]) # Retrieve buffer if not passed if filing_index_buffer is None: diff --git a/lexpredict_openedgar/requirements/full.txt b/lexpredict_openedgar/requirements/full.txt index f34945d..a6b57ea 100755 --- a/lexpredict_openedgar/requirements/full.txt +++ b/lexpredict_openedgar/requirements/full.txt @@ -18,7 +18,7 @@ cffi==1.11.5 chardet==3.0.4 coverage==4.5.1 coveralls==1.3.0 -datefinder==0.6.1 +datefinder==0.7.1 decorator==4.3.0 defusedxml==0.5.0 Django==2.0.8 @@ -58,7 +58,7 @@ jupyter-core==4.4.0 kombu==3.0.37 lazy-object-proxy==1.3.1 lxml==4.1.1 -MarkupSafe==1.0 +MarkupSafe==1.1 mccabe==0.6.1 mistune==0.8.3 more-itertools==4.1.0 @@ -71,7 +71,7 @@ num2words==0.5.5 numpy==1.14.3 oauthlib==2.0.7 packaging==17.1 -pandas==0.22.0 +pandas==0.21.0 pandocfilters==1.4.2 parso==0.2.0 pexpect==4.5.0 @@ -107,7 +107,7 @@ qtconsole==4.3.1 redis==2.10.6 regex==2017.9.23 reporters-db==1.0.12.1 -requests==2.20.0 +requests==2.18.4 requests-oauthlib==0.8.0 s3transfer==0.1.13 scikit-learn==0.19.1 @@ -128,7 +128,7 @@ tornado==5.0.2 traitlets==4.3.2 typing==3.6.2 Unidecode==0.4.21 -urllib3==1.23 +urllib3==1.22 wcwidth==0.1.7 webencodings==0.5.1 Werkzeug==0.14.1 From 67530964143e538be0a5f62739acff1dd725fd54 Mon Sep 17 00:00:00 2001 From: Tony Primerano Date: Mon, 15 Mar 2021 11:07:13 -0400 Subject: [PATCH 2/2] use path prefix on LocalClient in edgar.py too --- lexpredict_openedgar/openedgar/processes/edgar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lexpredict_openedgar/openedgar/processes/edgar.py b/lexpredict_openedgar/openedgar/processes/edgar.py index fa4b082..a02f960 100755 --- a/lexpredict_openedgar/openedgar/processes/edgar.py +++ b/lexpredict_openedgar/openedgar/processes/edgar.py @@ -66,8 +66,8 @@ def download_filing_index_data(year: int = None): # Create S3 client download_client = S3Client() else: - download_client = LocalClient() path_prefix = os.environ["DOWNLOAD_PATH"] + download_client = LocalClient(path_prefix) # Now iterate through list to check if already on S3 for filing_index_path in filing_index_list: