33from collections import defaultdict
44from contextlib import contextmanager
55from datetime import datetime , timezone
6+ from http import HTTPStatus
67from io import BytesIO , IOBase
78from typing import Dict , List , Optional , Tuple , Union
89
4950 AirtableImportNotRespectingConfig ,
5051 AirtableShareIsNotABase ,
5152 AirtableSkipCellValue ,
53+ FileDownloadFailed ,
5254)
5355from .import_report import (
56+ ERROR_TYPE_OTHER ,
5457 ERROR_TYPE_UNSUPPORTED_FEATURE ,
5558 SCOPE_AUTOMATIONS ,
59+ SCOPE_CELL ,
5660 SCOPE_FIELD ,
5761 SCOPE_INTERFACES ,
5862 SCOPE_VIEW ,
7882}
7983
8084
85+ def download_airtable_file (
86+ name : str ,
87+ download_file : DownloadFile ,
88+ init_data : dict ,
89+ request_id : str ,
90+ cookies : dict ,
91+ headers : dict = None ,
92+ ) -> Response :
93+ """
94+ Downloads a file from Airtable using either direct URL fetch or
95+ attachment endpoint.
96+
97+ :param name: The name of the file to download.
98+ :param download_file: The DownloadFile object containing download
99+ information
100+ :param init_data: The init_data returned by the initially
101+ requested shared base
102+ :param request_id: The request_id returned by the initially
103+ requested shared base
104+ :param cookies: The cookies dict returned by the initially
105+ requested shared base
106+ :param headers: Optional headers to use for the request
107+ :return: The response object from the download request
108+ :raises FileDownloadFailed: When the file could not be downloaded.
109+ """
110+
111+ if download_file .type == AIRTABLE_DOWNLOAD_FILE_TYPE_FETCH :
112+ response = requests .get (download_file .url , headers = headers ) # nosec B113
113+ elif download_file .type == AIRTABLE_DOWNLOAD_FILE_TYPE_ATTACHMENT_ENDPOINT :
114+ response = AirtableHandler .fetch_attachment (
115+ row_id = download_file .row_id ,
116+ column_id = download_file .column_id ,
117+ attachment_id = download_file .attachment_id ,
118+ init_data = init_data ,
119+ request_id = request_id ,
120+ cookies = cookies ,
121+ headers = headers ,
122+ )
123+ else :
124+ raise FileDownloadFailed (
125+ f"Unknown download file type: { download_file .type } " ,
126+ )
127+ if response .status_code not in [HTTPStatus .OK , HTTPStatus .PARTIAL_CONTENT ]:
128+ raise FileDownloadFailed (
129+ f"File { name } could not be downloaded (HTTP { response .status_code } )." ,
130+ )
131+
132+ return response
133+
134+
81135class AirtableFileImport :
82136 """
83137 A file-like object (we only need open and close methods) that facilitates on-demand
@@ -98,23 +152,24 @@ def add_files(self, files_to_download):
98152
99153 @contextmanager
100154 def open (self , name ):
101- download_file = self .files_to_download .get (name )
102- if download_file is None :
155+ if name is None :
103156 raise ValueError (f"No file with name { name } found." )
104157
105- if download_file .type == AIRTABLE_DOWNLOAD_FILE_TYPE_FETCH :
106- response = requests .get (
107- download_file .url , headers = BASE_HEADERS
108- ) # nosec B113
109- elif download_file .type == AIRTABLE_DOWNLOAD_FILE_TYPE_ATTACHMENT_ENDPOINT :
110- response = AirtableHandler .fetch_attachment (
111- row_id = download_file .row_id ,
112- column_id = download_file .column_id ,
113- attachment_id = download_file .attachment_id ,
114- init_data = self .init_data ,
115- request_id = self .request_id ,
116- cookies = self .cookies ,
117- )
158+ # Files for which check failed are excluded from the
159+ # files_to_download dict
160+ # Those missing files are already included in the import report
161+ if name not in self .files_to_download :
162+ raise KeyError (f"File '{ name } ' not found in files_to_download" )
163+
164+ response = download_airtable_file (
165+ name = name ,
166+ download_file = self .files_to_download [name ],
167+ init_data = self .init_data ,
168+ request_id = self .request_id ,
169+ cookies = self .cookies ,
170+ headers = BASE_HEADERS ,
171+ )
172+
118173 stream = BytesIO (response .content )
119174 try :
120175 yield stream
@@ -177,18 +232,24 @@ def fetch_publicly_shared_base(
177232 return request_id , init_data , cookies
178233
179234 @staticmethod
180- def make_airtable_request (init_data : dict , request_id : str , ** kwargs ) -> Response :
235+ def make_airtable_request (
236+ init_data : dict , request_id : str , headers = None , ** kwargs
237+ ) -> Response :
181238 """
182239 Helper method to make a valid request to to Airtable with the correct headers
183240 and params.
184241
185242 :param init_data: The init_data returned by the initially requested shared base.
186243 :param request_id: The request_id returned by the initially requested shared
187244 base.
245+ :param headers: The headers to be passed into the `requests` request.
188246 :param kwargs: THe kwargs that must be passed into the `requests.get` method.
189247 :return: The requests Response object related to the request.
190248 """
191249
250+ if headers is None :
251+ headers = BASE_HEADERS
252+
192253 application_id = list (init_data ["rawApplications" ].keys ())[0 ]
193254 client_code_version = init_data ["codeVersion" ]
194255 page_load_id = init_data ["pageLoadId" ]
@@ -208,7 +269,7 @@ def make_airtable_request(init_data: dict, request_id: str, **kwargs) -> Respons
208269 "X-Requested-With" : "XMLHttpRequest" ,
209270 "x-time-zone" : "Europe/Amsterdam" ,
210271 "x-user-locale" : "en" ,
211- ** BASE_HEADERS ,
272+ ** headers ,
212273 },
213274 timeout = 3 * 60 , # it can take quite a while for Airtable to respond.
214275 ** kwargs ,
@@ -315,6 +376,7 @@ def fetch_attachment(
315376 request_id : str ,
316377 cookies : dict ,
317378 stream = True ,
379+ headers = None ,
318380 ) -> Response :
319381 """
320382 :param row_id: The Airtable row id of the attachment that must be fetched.
@@ -331,6 +393,7 @@ def fetch_attachment(
331393 :param stream: Indicates whether the request should be streamed. This could be
332394 useful if we want to show a progress bar. It will directly be passed into
333395 the `requests` request.
396+ :param headers: The headers to be passed into the `requests` request.
334397 :return: The `requests` response containing the result.
335398 """
336399
@@ -348,6 +411,7 @@ def fetch_attachment(
348411 params = {"stringifiedObjectParams" : json .dumps (stringified_object_params )},
349412 cookies = cookies ,
350413 allow_redirects = True ,
414+ headers = headers ,
351415 )
352416 return response
353417
@@ -523,14 +587,18 @@ def to_baserow_row_export(
523587 return exported_row
524588
525589 @staticmethod
526- def download_files_as_zip (
590+ def prepare_downloadable_files (
527591 files_to_download : Dict [str , DownloadFile ],
528592 init_data : dict ,
529593 request_id : str ,
530594 cookies : dict ,
531595 config : AirtableImportConfig ,
532596 progress_builder : Optional [ChildProgressBuilder ] = None ,
533597 files_buffer : Union [None , IOBase ] = None ,
598+ import_report : AirtableImportReport = None ,
599+ field_mapping_per_table : dict = None ,
600+ exported_tables : list = None ,
601+ row_id_mapping : Dict [str , Dict [str , int ]] = None ,
534602 ) -> BytesIO :
535603 """
536604 This method was used to download the files, but now it only collects
@@ -573,6 +641,49 @@ def download_files_as_zip(
573641 cookies = cookies ,
574642 headers = BASE_HEADERS ,
575643 )
644+
645+ failed_files = []
646+ for file_name , download_file in files_to_download .items ():
647+ headers = BASE_HEADERS .copy ()
648+ headers ["Range" ] = "bytes=0-5"
649+
650+ try :
651+ download_airtable_file (
652+ file_name , download_file , init_data , request_id , cookies , headers
653+ )
654+ except FileDownloadFailed :
655+ field_name = ""
656+ table_name = ""
657+ baserow_row_id = download_file .row_id
658+
659+ for table_id , field_mapping in field_mapping_per_table .items ():
660+ if download_file .column_id in field_mapping :
661+ field_info = field_mapping [download_file .column_id ]
662+ field_name = field_info ["baserow_field" ].name
663+
664+ for exported_table in exported_tables :
665+ if exported_table ["id" ] == table_id :
666+ table_name = exported_table ["name" ]
667+ break
668+
669+ if row_id_mapping and table_id in row_id_mapping :
670+ baserow_row_id = row_id_mapping [table_id ].get (
671+ download_file .row_id , download_file .row_id
672+ )
673+ break
674+
675+ import_report .add_failed (
676+ "File" ,
677+ SCOPE_CELL ,
678+ table_name ,
679+ ERROR_TYPE_OTHER ,
680+ f"Field: { field_name } , Row: { baserow_row_id } , File: { file_name } " ,
681+ )
682+ failed_files .append (file_name )
683+
684+ for file_name in failed_files :
685+ files_to_download .pop (file_name , None )
686+
576687 file_archive .add_files (files_to_download )
577688 progress .increment (state = AIRTABLE_EXPORT_JOB_DOWNLOADING_FILES )
578689
@@ -945,18 +1056,28 @@ def to_baserow_database_export(
9451056 ** DatabaseExportSerializedStructure .database (tables = exported_tables )
9461057 )
9471058
1059+ report_items_count = len (import_report .items )
1060+
9481061 # After all the tables have been converted to Baserow format, we must
9491062 # download all the user files. Because we first want to the whole conversion to
9501063 # be completed and because we want this to be added to the progress bar, this is
9511064 # done last.
952- user_files_zip = cls .download_files_as_zip (
1065+ user_files_zip = cls .prepare_downloadable_files (
9531066 files_to_download ,
9541067 init_data ,
9551068 request_id ,
9561069 cookies ,
9571070 config ,
9581071 progress .create_child_builder (represents_progress = 500 ),
9591072 download_files_buffer ,
1073+ import_report ,
1074+ field_mapping_per_table ,
1075+ exported_tables ,
1076+ row_id_mapping ,
1077+ )
1078+
1079+ import_report .append_items_to_exported_table (
1080+ exported_database , import_report .items [report_items_count :]
9601081 )
9611082
9621083 return exported_database , user_files_zip
0 commit comments