Skip to content

Commit 0cfdc22

Browse files
Merge branch '3581-attachments-expire-before-being-imported' into 'develop'
Attachments are not available during import Closes baserow#3581 See merge request baserow/baserow!3518
2 parents 98dba5f + 60e9703 commit 0cfdc22

File tree

5 files changed

+220
-27
lines changed

5 files changed

+220
-27
lines changed

backend/src/baserow/contrib/database/airtable/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,11 @@ class AirtableSkipFilter(Exception):
2525
"""
2626
Raised when an Airtable filter is not compatible and must be skipped.
2727
"""
28+
29+
30+
class FileDownloadFailed(Exception):
31+
"""Raised when a file download fails."""
32+
33+
def __init__(self, message):
34+
self.message = message
35+
super().__init__(message)

backend/src/baserow/contrib/database/airtable/handler.py

Lines changed: 140 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from collections import defaultdict
44
from contextlib import contextmanager
55
from datetime import datetime, timezone
6+
from http import HTTPStatus
67
from io import BytesIO, IOBase
78
from typing import Dict, List, Optional, Tuple, Union
89

@@ -49,10 +50,13 @@
4950
AirtableImportNotRespectingConfig,
5051
AirtableShareIsNotABase,
5152
AirtableSkipCellValue,
53+
FileDownloadFailed,
5254
)
5355
from .import_report import (
56+
ERROR_TYPE_OTHER,
5457
ERROR_TYPE_UNSUPPORTED_FEATURE,
5558
SCOPE_AUTOMATIONS,
59+
SCOPE_CELL,
5660
SCOPE_FIELD,
5761
SCOPE_INTERFACES,
5862
SCOPE_VIEW,
@@ -78,6 +82,56 @@
7882
}
7983

8084

85+
def download_airtable_file(
86+
name: str,
87+
download_file: DownloadFile,
88+
init_data: dict,
89+
request_id: str,
90+
cookies: dict,
91+
headers: dict = None,
92+
) -> Response:
93+
"""
94+
Downloads a file from Airtable using either direct URL fetch or
95+
attachment endpoint.
96+
97+
:param name: The name of the file to download.
98+
:param download_file: The DownloadFile object containing download
99+
information
100+
:param init_data: The init_data returned by the initially
101+
requested shared base
102+
:param request_id: The request_id returned by the initially
103+
requested shared base
104+
:param cookies: The cookies dict returned by the initially
105+
requested shared base
106+
:param headers: Optional headers to use for the request
107+
:return: The response object from the download request
108+
:raises FileDownloadFailed: When the file could not be downloaded.
109+
"""
110+
111+
if download_file.type == AIRTABLE_DOWNLOAD_FILE_TYPE_FETCH:
112+
response = requests.get(download_file.url, headers=headers) # nosec B113
113+
elif download_file.type == AIRTABLE_DOWNLOAD_FILE_TYPE_ATTACHMENT_ENDPOINT:
114+
response = AirtableHandler.fetch_attachment(
115+
row_id=download_file.row_id,
116+
column_id=download_file.column_id,
117+
attachment_id=download_file.attachment_id,
118+
init_data=init_data,
119+
request_id=request_id,
120+
cookies=cookies,
121+
headers=headers,
122+
)
123+
else:
124+
raise FileDownloadFailed(
125+
f"Unknown download file type: {download_file.type}",
126+
)
127+
if response.status_code not in [HTTPStatus.OK, HTTPStatus.PARTIAL_CONTENT]:
128+
raise FileDownloadFailed(
129+
f"File {name} could not be downloaded (HTTP {response.status_code}).",
130+
)
131+
132+
return response
133+
134+
81135
class AirtableFileImport:
82136
"""
83137
A file-like object (we only need open and close methods) that facilitates on-demand
@@ -98,23 +152,24 @@ def add_files(self, files_to_download):
98152

99153
@contextmanager
100154
def open(self, name):
101-
download_file = self.files_to_download.get(name)
102-
if download_file is None:
155+
if name is None:
103156
raise ValueError(f"No file with name {name} found.")
104157

105-
if download_file.type == AIRTABLE_DOWNLOAD_FILE_TYPE_FETCH:
106-
response = requests.get(
107-
download_file.url, headers=BASE_HEADERS
108-
) # nosec B113
109-
elif download_file.type == AIRTABLE_DOWNLOAD_FILE_TYPE_ATTACHMENT_ENDPOINT:
110-
response = AirtableHandler.fetch_attachment(
111-
row_id=download_file.row_id,
112-
column_id=download_file.column_id,
113-
attachment_id=download_file.attachment_id,
114-
init_data=self.init_data,
115-
request_id=self.request_id,
116-
cookies=self.cookies,
117-
)
158+
# Files for which check failed are excluded from the
159+
# files_to_download dict
160+
# Those missing files are already included in the import report
161+
if name not in self.files_to_download:
162+
raise KeyError(f"File '{name}' not found in files_to_download")
163+
164+
response = download_airtable_file(
165+
name=name,
166+
download_file=self.files_to_download[name],
167+
init_data=self.init_data,
168+
request_id=self.request_id,
169+
cookies=self.cookies,
170+
headers=BASE_HEADERS,
171+
)
172+
118173
stream = BytesIO(response.content)
119174
try:
120175
yield stream
@@ -177,18 +232,24 @@ def fetch_publicly_shared_base(
177232
return request_id, init_data, cookies
178233

179234
@staticmethod
180-
def make_airtable_request(init_data: dict, request_id: str, **kwargs) -> Response:
235+
def make_airtable_request(
236+
init_data: dict, request_id: str, headers=None, **kwargs
237+
) -> Response:
181238
"""
182239
Helper method to make a valid request to to Airtable with the correct headers
183240
and params.
184241
185242
:param init_data: The init_data returned by the initially requested shared base.
186243
:param request_id: The request_id returned by the initially requested shared
187244
base.
245+
:param headers: The headers to be passed into the `requests` request.
188246
:param kwargs: THe kwargs that must be passed into the `requests.get` method.
189247
:return: The requests Response object related to the request.
190248
"""
191249

250+
if headers is None:
251+
headers = BASE_HEADERS
252+
192253
application_id = list(init_data["rawApplications"].keys())[0]
193254
client_code_version = init_data["codeVersion"]
194255
page_load_id = init_data["pageLoadId"]
@@ -208,7 +269,7 @@ def make_airtable_request(init_data: dict, request_id: str, **kwargs) -> Respons
208269
"X-Requested-With": "XMLHttpRequest",
209270
"x-time-zone": "Europe/Amsterdam",
210271
"x-user-locale": "en",
211-
**BASE_HEADERS,
272+
**headers,
212273
},
213274
timeout=3 * 60, # it can take quite a while for Airtable to respond.
214275
**kwargs,
@@ -315,6 +376,7 @@ def fetch_attachment(
315376
request_id: str,
316377
cookies: dict,
317378
stream=True,
379+
headers=None,
318380
) -> Response:
319381
"""
320382
:param row_id: The Airtable row id of the attachment that must be fetched.
@@ -331,6 +393,7 @@ def fetch_attachment(
331393
:param stream: Indicates whether the request should be streamed. This could be
332394
useful if we want to show a progress bar. It will directly be passed into
333395
the `requests` request.
396+
:param headers: The headers to be passed into the `requests` request.
334397
:return: The `requests` response containing the result.
335398
"""
336399

@@ -348,6 +411,7 @@ def fetch_attachment(
348411
params={"stringifiedObjectParams": json.dumps(stringified_object_params)},
349412
cookies=cookies,
350413
allow_redirects=True,
414+
headers=headers,
351415
)
352416
return response
353417

@@ -523,14 +587,18 @@ def to_baserow_row_export(
523587
return exported_row
524588

525589
@staticmethod
526-
def download_files_as_zip(
590+
def prepare_downloadable_files(
527591
files_to_download: Dict[str, DownloadFile],
528592
init_data: dict,
529593
request_id: str,
530594
cookies: dict,
531595
config: AirtableImportConfig,
532596
progress_builder: Optional[ChildProgressBuilder] = None,
533597
files_buffer: Union[None, IOBase] = None,
598+
import_report: AirtableImportReport = None,
599+
field_mapping_per_table: dict = None,
600+
exported_tables: list = None,
601+
row_id_mapping: Dict[str, Dict[str, int]] = None,
534602
) -> BytesIO:
535603
"""
536604
This method was used to download the files, but now it only collects
@@ -573,6 +641,49 @@ def download_files_as_zip(
573641
cookies=cookies,
574642
headers=BASE_HEADERS,
575643
)
644+
645+
failed_files = []
646+
for file_name, download_file in files_to_download.items():
647+
headers = BASE_HEADERS.copy()
648+
headers["Range"] = "bytes=0-5"
649+
650+
try:
651+
download_airtable_file(
652+
file_name, download_file, init_data, request_id, cookies, headers
653+
)
654+
except FileDownloadFailed:
655+
field_name = ""
656+
table_name = ""
657+
baserow_row_id = download_file.row_id
658+
659+
for table_id, field_mapping in field_mapping_per_table.items():
660+
if download_file.column_id in field_mapping:
661+
field_info = field_mapping[download_file.column_id]
662+
field_name = field_info["baserow_field"].name
663+
664+
for exported_table in exported_tables:
665+
if exported_table["id"] == table_id:
666+
table_name = exported_table["name"]
667+
break
668+
669+
if row_id_mapping and table_id in row_id_mapping:
670+
baserow_row_id = row_id_mapping[table_id].get(
671+
download_file.row_id, download_file.row_id
672+
)
673+
break
674+
675+
import_report.add_failed(
676+
"File",
677+
SCOPE_CELL,
678+
table_name,
679+
ERROR_TYPE_OTHER,
680+
f"Field: {field_name}, Row: {baserow_row_id}, File: {file_name}",
681+
)
682+
failed_files.append(file_name)
683+
684+
for file_name in failed_files:
685+
files_to_download.pop(file_name, None)
686+
576687
file_archive.add_files(files_to_download)
577688
progress.increment(state=AIRTABLE_EXPORT_JOB_DOWNLOADING_FILES)
578689

@@ -945,18 +1056,28 @@ def to_baserow_database_export(
9451056
**DatabaseExportSerializedStructure.database(tables=exported_tables)
9461057
)
9471058

1059+
report_items_count = len(import_report.items)
1060+
9481061
# After all the tables have been converted to Baserow format, we must
9491062
# download all the user files. Because we first want to the whole conversion to
9501063
# be completed and because we want this to be added to the progress bar, this is
9511064
# done last.
952-
user_files_zip = cls.download_files_as_zip(
1065+
user_files_zip = cls.prepare_downloadable_files(
9531066
files_to_download,
9541067
init_data,
9551068
request_id,
9561069
cookies,
9571070
config,
9581071
progress.create_child_builder(represents_progress=500),
9591072
download_files_buffer,
1073+
import_report,
1074+
field_mapping_per_table,
1075+
exported_tables,
1076+
row_id_mapping,
1077+
)
1078+
1079+
import_report.append_items_to_exported_table(
1080+
exported_database, import_report.items[report_items_count:]
9601081
)
9611082

9621083
return exported_database, user_files_zip

backend/src/baserow/contrib/database/airtable/import_report.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
from baserow.contrib.database.views.registries import view_type_registry
1414
from baserow.core.constants import BASEROW_COLORS
1515

16+
REPORT_TABLE_ID = "report"
17+
REPORT_TABLE_NAME = "Airtable import report"
18+
1619
SCOPE_FIELD = SelectOption(id="scope_field", value="Field", color="light-blue", order=1)
1720
SCOPE_CELL = SelectOption(id="scope_cell", value="Cell", color="light-green", order=2)
1821
SCOPE_VIEW = SelectOption(id="scope_view", value="View", color="light-cyan", order=3)
@@ -161,8 +164,8 @@ def get_baserow_export_table(self, order: int) -> dict:
161164
exported_rows.append(row)
162165

163166
exported_table = DatabaseExportSerializedStructure.table(
164-
id="report",
165-
name="Airtable import report",
167+
id=REPORT_TABLE_ID,
168+
name=REPORT_TABLE_NAME,
166169
order=order,
167170
fields=exported_fields,
168171
views=exported_views,
@@ -172,3 +175,49 @@ def get_baserow_export_table(self, order: int) -> dict:
172175
)
173176

174177
return exported_table
178+
179+
def append_items_to_exported_table(
180+
self, exported_database: dict, items: list
181+
) -> None:
182+
"""
183+
Appends new items to an existing exported table.
184+
185+
:param exported_database: The exported database
186+
:param items: List of ImportReportFailedItem to append
187+
"""
188+
189+
if not items:
190+
return
191+
192+
report_table = next(
193+
(
194+
table
195+
for table in exported_database["tables"]
196+
if table["id"] == REPORT_TABLE_ID
197+
),
198+
None,
199+
)
200+
if not report_table:
201+
return
202+
203+
current_row_count = len(report_table["rows"])
204+
table_select_options = {
205+
opt["value"]: opt for opt in report_table["fields"][2]["select_options"]
206+
}
207+
208+
for index, item in enumerate(items, start=current_row_count + 1):
209+
table_select_option = table_select_options.get(item.table)
210+
row = DatabaseExportSerializedStructure.row(
211+
id=index,
212+
order=f"{index}.00000000000000000000",
213+
created_on=None,
214+
updated_on=None,
215+
)
216+
row["field_object_name"] = item.object_name
217+
row["field_scope"] = item.scope.id
218+
row["field_table"] = (
219+
table_select_option["id"] if table_select_option else None
220+
)
221+
row["field_error_type"] = item.error_type.id
222+
row["field_message"] = item.message
223+
report_table["rows"].append(row)

backend/src/baserow/contrib/database/fields/field_types.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3971,12 +3971,19 @@ def set_import_serialized_value(
39713971
if files_zip is None:
39723972
files.append(file)
39733973
else:
3974-
with files_zip.open(file["name"]) as stream:
3975-
# Try to upload the user file with the original name to make sure
3976-
# that if the was already uploaded, it will not be uploaded again.
3977-
user_file = user_file_handler.upload_user_file(
3978-
None, file["original_name"], stream, storage=storage
3979-
)
3974+
try:
3975+
with files_zip.open(file["name"]) as stream:
3976+
# Try to upload the user file with the original name
3977+
# to make sure that if the was already uploaded, it will
3978+
# not be uploaded again.
3979+
user_file = user_file_handler.upload_user_file(
3980+
None, file["original_name"], stream, storage=storage
3981+
)
3982+
except KeyError:
3983+
# File not found in zip archive - skip this file and
3984+
# let the import process report handle missing files
3985+
# appropriately
3986+
continue
39803987

39813988
value = user_file.serialize()
39823989
value["visible_name"] = file["visible_name"]
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"type": "bug",
3+
"message": "Log files that cannot be downloaaded during import",
4+
"domain": "database",
5+
"issue_number": 3581,
6+
"bullet_points": [],
7+
"created_at": "2025-06-10"
8+
}

0 commit comments

Comments
 (0)