11# Extract Tracking
22# Used in the creation and editing of extract records. Used in conjunction with process tracking.
33from datetime import datetime
4+ import itertools
45import logging
56import os
67from pathlib import Path
@@ -43,6 +44,7 @@ def __init__(
4344 filetype = None ,
4445 config_location = None ,
4546 extract_id = None ,
47+ file_size = None ,
4648 ):
4749 """
4850 ExtractTracker is the primary engine for tracking data extracts
@@ -68,6 +70,8 @@ def __init__(
6870 :param extract_id: If trying to work with a specific extract that's in process, provide the id and it will be
6971 reconstructed.
7072 :type extract_id: int
73+ :param file_size: The size of the file (i.e. 2.21MB)
74+ :type file_size: str
7175 """
7276 log_level = SettingsManager (
7377 config_location = config_location
@@ -114,6 +118,18 @@ def __init__(
114118 self .extract_process = self .retrieve_extract_process ()
115119 self .sources = self .extract .extract_sources
116120
121+ if (
122+ self .extract .extract_filesize is not None
123+ and self .extract .extract_filesize_type is not None
124+ ):
125+
126+ self .file_size = "%s %s" % (
127+ self .extract .extract_filesize ,
128+ self .extract .extract_filesize_type ,
129+ )
130+ else :
131+ self .file_size = None
132+
117133 else :
118134 if filename is None :
119135 error_msg = "Filename must be provided."
@@ -230,6 +246,14 @@ def __init__(
230246 self .logger .info ("Status was not provided. Initializing." )
231247 self .extract .extract_status_id = self .extract_status_initializing
232248
249+ if file_size is not None :
250+ split_filesize = self .file_size_splitter (file_size = file_size )
251+
252+ self .extract .extract_filesize = split_filesize [0 ]
253+ self .extract .extract_filesize_type = split_filesize [1 ]
254+
255+ self .file_size = file_size
256+
233257 self .session .commit ()
234258
235259 def add_dependency (self , dependency_type , dependency ):
@@ -382,6 +406,47 @@ def extract_dependency_check(self, extracts=None):
382406 else :
383407 return False
384408
409+ def file_size_splitter (self , file_size ):
410+ """
411+ Take provided file size and split the amount from the measure.
412+ :param file_size: The provided file size with measure (i.e. 2.2GB)
413+ :type file_size: str
414+ :return:
415+ """
416+ file_size_split = dict ()
417+
418+ amount = "" .join (itertools .takewhile (str .isdigit , file_size ))
419+ self .logger .debug ("Amount is now: %s" % amount )
420+ measure = "" .join ([i for i in file_size if not i .isdigit () and i != "." ])
421+ self .logger .debug ("Measure is: %s" % measure )
422+ amount = int (amount )
423+ self .logger .debug ("Amount is now: %s" % amount )
424+ if (
425+ measure == "bytes"
426+ or measure .lower () == "b"
427+ or measure == ""
428+ or measure is None
429+ ):
430+ amount = amount / 1048576 # converting bytes to gb
431+ measure = "GB"
432+ elif measure .lower () == "mb" :
433+
434+ measure = "MB"
435+ elif measure .lower () == "gb" :
436+ measure = "GB"
437+ else :
438+ error_msg = (
439+ "Unsupported measure detected. Please provide file size in bytes, MB, or GB. "
440+ "Measure provided was: %s" % measure
441+ )
442+ self .logger .error (error_msg )
443+ raise Exception (error_msg )
444+
445+ file_size_split [0 ] = amount
446+ file_size_split [1 ] = measure
447+
448+ return file_size_split
449+
385450 def get_dataset_types (self ):
386451 """
387452 Get list of dataset types associated to extract and return list.
0 commit comments