From 1203c87507695ee1880dcc6d2f361572a826a070 Mon Sep 17 00:00:00 2001 From: TOMONORI ENDOU Date: Mon, 9 Jun 2025 09:31:00 +0900 Subject: [PATCH 1/2] extract metadata from redirect headers to avoid redundant WB req --- mfr/providers/osf/provider.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index 4745e8ecc..8a6f16e49 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -34,6 +34,7 @@ def __init__(self, request, url, action=None): super().__init__(request, url, action) self.download_url = None self.headers = {} + self._cached_metadata = None # capture request authorization self.cookies = dict(self.request.cookies) @@ -60,7 +61,10 @@ async def metadata(self): """ download_url = await self._fetch_download_url() logger.debug('download_url::{}'.format(download_url)) - if '/file?' in download_url: + if self._cached_metadata: + metadata = self._cached_metadata + self.metrics.add('metadata.wb_api', 'cached_from_head') + elif '/file?' in download_url: # URL is for WaterButler v0 API # TODO Remove this when API v0 is officially deprecated self.metrics.add('metadata.wb_api', 'v0') @@ -171,7 +175,7 @@ async def _fetch_download_url(self): self.metrics.add('download_url.orig_type', 'osf') # make request to osf, don't follow, store waterbutler download url request = await self._make_request( - 'GET', + 'HEAD', self.url, allow_redirects=False, headers={ @@ -189,6 +193,7 @@ async def _fetch_download_url(self): code=request.status, ) self.download_url = request.headers['location'] + self._cached_metadata = {'data': json.loads(request.headers['x-file-metadata'])} self.metrics.add('download_url.derived_url', str(self.download_url)) @@ -206,3 +211,4 @@ async def _make_request(self, method, url, *args, **kwargs): kwargs.setdefault('headers', {})['Authorization'] = 'Bearer ' + self.token return await aiohttp.request(method, url, *args, **kwargs) + \ No newline at end of file From b46c9af9209912af3dea7c488be2a7545d7c08c1 Mon Sep 17 00:00:00 2001 From: TOMONORI ENDOU Date: Sun, 12 Oct 2025 15:47:24 +0900 Subject: [PATCH 2/2] Update provider.py --- mfr/providers/osf/provider.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mfr/providers/osf/provider.py b/mfr/providers/osf/provider.py index 8a6f16e49..5f4d5c6f8 100644 --- a/mfr/providers/osf/provider.py +++ b/mfr/providers/osf/provider.py @@ -211,4 +211,3 @@ async def _make_request(self, method, url, *args, **kwargs): kwargs.setdefault('headers', {})['Authorization'] = 'Bearer ' + self.token return await aiohttp.request(method, url, *args, **kwargs) - \ No newline at end of file