Skip to content

Commit 3b9b95d

Browse files
authored
add try/except and error messages to file downloads (#173)
1 parent 3a431fd commit 3b9b95d

File tree

1 file changed

+27
-7
lines changed

1 file changed

+27
-7
lines changed

worker/cp-worker.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import boto3
2+
import botocore
23
import json
34
import logging
45
import os
@@ -190,8 +191,14 @@ def runCellProfiler(message):
190191
subfolders = '/'.join((csv_insubfolders)[:-1])
191192
if not os.path.exists(os.path.join(localIn,subfolders)):
192193
os.makedirs(os.path.join(localIn,subfolders), exist_ok=True)
193-
s3 = boto3.resource('s3')
194-
s3.meta.client.download_file(WORKSPACE_BUCKET, message['data_file'], data_file_path)
194+
s3client=boto3.client('s3')
195+
try:
196+
s3client.download_file(WORKSPACE_BUCKET, message['data_file'], data_file_path)
197+
except botocore.exceptions.ClientError:
198+
printandlog(f"Can't find load data file in S3. Looking for {message['data_file']} in {WORKSPACE_BUCKET}",logger)
199+
printandlog("Aborting. Can't run without load data.",logger)
200+
logger.removeHandler(watchtowerlogger)
201+
return 'DOWNLOAD_PROBLEM'
195202
if message['data_file'][-4:]=='.csv':
196203
printandlog('Figuring which files to download', logger)
197204
import pandas
@@ -226,8 +233,11 @@ def runCellProfiler(message):
226233
os.makedirs(os.path.split(new_file_name)[0])
227234
printandlog(f'Made directory {os.path.split(new_file_name)[0]}',logger)
228235
if not os.path.exists(new_file_name):
229-
s3.meta.client.download_file(SOURCE_BUCKET,prefix_on_bucket,new_file_name)
230-
downloaded_files.append(new_file_name)
236+
try:
237+
s3client.download_file(SOURCE_BUCKET,prefix_on_bucket,new_file_name)
238+
downloaded_files.append(new_file_name)
239+
except botocore.exceptions.ClientError:
240+
printandlog(f"Can't find file in S3. Looking for {prefix_on_bucket} in {SOURCE_BUCKET}",logger)
231241
printandlog(f'Downloaded {str(len(downloaded_files))} files',logger)
232242
import random
233243
newtag = False
@@ -255,15 +265,25 @@ def runCellProfiler(message):
255265
os.makedirs(os.path.split(new_file_name)[0])
256266
printandlog(f'made directory {os.path.split(new_file_name)[0]}',logger)
257267
if not os.path.exists(new_file_name):
258-
s3.meta.client.download_file(SOURCE_BUCKET,prefix_on_bucket,new_file_name)
259-
downloaded_files.append(new_file_name)
268+
try:
269+
s3client.download_file(SOURCE_BUCKET,prefix_on_bucket,new_file_name)
270+
downloaded_files.append(new_file_name)
271+
except botocore.exceptions.ClientError:
272+
printandlog(f"Can't find file in S3. Looking for {prefix_on_bucket} in {SOURCE_BUCKET}",logger)
260273
printandlog(f'Downloaded {str(len(downloaded_files))} files',logger)
261274
else:
262275
printandlog("Couldn't parse data file for file download. Not supported input of .csv or .txt",logger)
263276
# Download pipeline and update pipeline path in message
264277
printandlog(f"Downloading {message['pipeline']} from {WORKSPACE_BUCKET}", logger)
265278
pipepath = os.path.join(localIn, message['pipeline'].split('/')[-1])
266-
s3.meta.client.download_file(WORKSPACE_BUCKET, message['pipeline'], pipepath)
279+
try:
280+
s3client.download_file(WORKSPACE_BUCKET, message['pipeline'], pipepath)
281+
except botocore.exceptions.ClientError:
282+
printandlog(f"Can't find pipeline in S3. Looking for {message['pipeline']} in {WORKSPACE_BUCKET}",logger)
283+
printandlog("Aborting. Can't run without pipeline.",logger)
284+
logger.removeHandler(watchtowerlogger)
285+
return 'DOWNLOAD_PROBLEM'
286+
267287
else:
268288
data_file_path = os.path.join(DATA_ROOT,message['data_file'])
269289
pipepath = os.path.join(DATA_ROOT,message["pipeline"])

0 commit comments

Comments
 (0)