Skip to content

Commit 1adfe16

Browse files
committed
feat: add download_stream, on_progress downloads, configurable user_agent, full test suite
- FilesAPI: add download_stream/download_stream_async, on_progress param in download_to/download_to_async - Task: add download_stream/download_stream_async, on_progress param in download_to/download_to_async - client: wire on_download_progress in convert/convert_async, configurable user_agent config option - tests: 139 tests covering exceptions, validation, retry, polling, files API, task model, client
1 parent 963f0cc commit 1adfe16

12 files changed

Lines changed: 1553 additions & 43 deletions

File tree

conversiontools/api/files.py

Lines changed: 128 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,27 @@
44

55
import os
66
import re
7+
import httpx
78
from pathlib import Path
8-
from typing import Optional, Union, BinaryIO
9+
from typing import Optional, Union, BinaryIO, Iterator, AsyncIterator, Callable
910
from urllib.parse import quote
10-
from ..types.config import FileUploadResponse, FileInfo, FileUploadOptions
11+
from ..types.config import FileUploadResponse, FileInfo, FileUploadOptions, ProgressEvent
1112
from ..utils.errors import ValidationError
1213
from ..utils.validation import validate_file_id
1314
from ..utils.progress import create_progress_event
1415
from .http import HttpClient
1516

1617

18+
def _extract_filename(disposition: Optional[str]) -> Optional[str]:
19+
"""Extract filename from Content-Disposition header"""
20+
if not disposition:
21+
return None
22+
matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition)
23+
if matches and matches.group(1):
24+
return matches.group(1).strip('\'"')
25+
return None
26+
27+
1728
class FilesAPI:
1829
"""Files API for upload, download, and file management"""
1930

@@ -61,8 +72,6 @@ def upload(
6172
on_progress(create_progress_event(total, total))
6273

6374
# Create multipart form data
64-
import httpx
65-
6675
files = {"file": (filename or "file", file_data)}
6776

6877
# Upload file using httpx directly with multipart
@@ -125,8 +134,6 @@ async def upload_async(
125134
on_progress(create_progress_event(total, total))
126135

127136
# Create multipart form data
128-
import httpx
129-
130137
files = {"file": (filename or "file", file_data)}
131138

132139
# Upload file using httpx directly with multipart
@@ -170,59 +177,146 @@ async def download_bytes_async(self, file_id: str) -> bytes:
170177
response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True)
171178
return response.content
172179

173-
def download_to(self, file_id: str, output_path: Optional[str] = None) -> str:
180+
def download_stream(self, file_id: str) -> Iterator[bytes]:
181+
"""Download file as a byte stream (sync)"""
182+
validate_file_id(file_id)
183+
url = f"{self.http.base_url}/files/{quote(file_id)}"
184+
headers = {"Authorization": f"Bearer {self.http.api_token}"}
185+
if self.http.user_agent:
186+
headers["User-Agent"] = self.http.user_agent
187+
188+
with httpx.Client(timeout=self.http.timeout) as client:
189+
with client.stream("GET", url, headers=headers) as response:
190+
if not response.is_success:
191+
self.http._handle_error_response(response)
192+
self.http._extract_rate_limits(response.headers)
193+
yield from response.iter_bytes()
194+
195+
async def download_stream_async(self, file_id: str) -> AsyncIterator[bytes]:
196+
"""Download file as a byte stream (async)"""
197+
validate_file_id(file_id)
198+
url = f"{self.http.base_url}/files/{quote(file_id)}"
199+
headers = {"Authorization": f"Bearer {self.http.api_token}"}
200+
if self.http.user_agent:
201+
headers["User-Agent"] = self.http.user_agent
202+
203+
async with httpx.AsyncClient(timeout=self.http.timeout) as client:
204+
async with client.stream("GET", url, headers=headers) as response:
205+
if not response.is_success:
206+
self.http._handle_error_response(response)
207+
self.http._extract_rate_limits(response.headers)
208+
async for chunk in response.aiter_bytes():
209+
yield chunk
210+
211+
def download_to(
212+
self,
213+
file_id: str,
214+
output_path: Optional[str] = None,
215+
on_progress: Optional[Callable[[ProgressEvent], None]] = None,
216+
) -> str:
174217
"""Download file to path (sync)"""
175218
validate_file_id(file_id)
176-
response = self.http.get(f"/files/{quote(file_id)}", raw=True)
177219

178-
# Determine output filename
179-
filename = output_path
180-
if not filename:
181-
# Try to get filename from Content-Disposition header
182-
disposition = response.headers.get("content-disposition")
183-
if disposition:
184-
matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition)
185-
if matches and matches.group(1):
186-
filename = matches.group(1).strip('\'"')
220+
if on_progress:
221+
url = f"{self.http.base_url}/files/{quote(file_id)}"
222+
headers = {"Authorization": f"Bearer {self.http.api_token}"}
223+
if self.http.user_agent:
224+
headers["User-Agent"] = self.http.user_agent
225+
226+
with httpx.Client(timeout=self.http.timeout) as client:
227+
with client.stream("GET", url, headers=headers) as response:
228+
if not response.is_success:
229+
self.http._handle_error_response(response)
230+
self.http._extract_rate_limits(response.headers)
231+
232+
filename = (
233+
output_path
234+
or _extract_filename(response.headers.get("content-disposition"))
235+
or "result"
236+
)
237+
output_dir = os.path.dirname(filename)
238+
if output_dir and not os.path.exists(output_dir):
239+
os.makedirs(output_dir, exist_ok=True)
240+
241+
content_length = response.headers.get("content-length")
242+
total = int(content_length) if content_length else None
243+
loaded = 0
244+
245+
with open(filename, "wb") as f:
246+
for chunk in response.iter_bytes():
247+
f.write(chunk)
248+
loaded += len(chunk)
249+
on_progress(create_progress_event(loaded, total))
250+
251+
return filename
187252

188-
filename = filename or "result"
253+
response = self.http.get(f"/files/{quote(file_id)}", raw=True)
254+
255+
filename = output_path or _extract_filename(
256+
response.headers.get("content-disposition")
257+
) or "result"
189258

190-
# Ensure directory exists
191259
output_dir = os.path.dirname(filename)
192260
if output_dir and not os.path.exists(output_dir):
193261
os.makedirs(output_dir, exist_ok=True)
194262

195-
# Write file
196263
with open(filename, "wb") as f:
197264
f.write(response.content)
198265

199266
return filename
200267

201268
async def download_to_async(
202-
self, file_id: str, output_path: Optional[str] = None
269+
self,
270+
file_id: str,
271+
output_path: Optional[str] = None,
272+
on_progress: Optional[Callable[[ProgressEvent], None]] = None,
203273
) -> str:
204274
"""Download file to path (async)"""
205275
validate_file_id(file_id)
206-
response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True)
207276

208-
# Determine output filename
209-
filename = output_path
210-
if not filename:
211-
# Try to get filename from Content-Disposition header
212-
disposition = response.headers.get("content-disposition")
213-
if disposition:
214-
matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition)
215-
if matches and matches.group(1):
216-
filename = matches.group(1).strip('\'"')
277+
if on_progress:
278+
url = f"{self.http.base_url}/files/{quote(file_id)}"
279+
headers = {"Authorization": f"Bearer {self.http.api_token}"}
280+
if self.http.user_agent:
281+
headers["User-Agent"] = self.http.user_agent
282+
283+
async with httpx.AsyncClient(timeout=self.http.timeout) as client:
284+
async with client.stream("GET", url, headers=headers) as response:
285+
if not response.is_success:
286+
self.http._handle_error_response(response)
287+
self.http._extract_rate_limits(response.headers)
288+
289+
filename = (
290+
output_path
291+
or _extract_filename(response.headers.get("content-disposition"))
292+
or "result"
293+
)
294+
output_dir = os.path.dirname(filename)
295+
if output_dir and not os.path.exists(output_dir):
296+
os.makedirs(output_dir, exist_ok=True)
297+
298+
content_length = response.headers.get("content-length")
299+
total = int(content_length) if content_length else None
300+
loaded = 0
301+
302+
with open(filename, "wb") as f:
303+
async for chunk in response.aiter_bytes():
304+
f.write(chunk)
305+
loaded += len(chunk)
306+
on_progress(create_progress_event(loaded, total))
307+
308+
return filename
309+
310+
response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True)
217311

218-
filename = filename or "result"
312+
filename = output_path or _extract_filename(
313+
response.headers.get("content-disposition")
314+
) or "result"
219315

220-
# Ensure directory exists
221316
output_dir = os.path.dirname(filename)
222317
if output_dir and not os.path.exists(output_dir):
223318
os.makedirs(output_dir, exist_ok=True)
224319

225-
# Write file
226320
with open(filename, "wb") as f:
227321
f.write(response.content)
228322

conversiontools/client.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(self, config: ConversionToolsConfig):
4444
"max_polling_interval": config.get("max_polling_interval", 30000),
4545
"polling_backoff": config.get("polling_backoff", 1.5),
4646
"webhook_url": config.get("webhook_url"),
47+
"user_agent": config.get("user_agent", f"conversiontools-python/{VERSION}"),
4748
"on_upload_progress": config.get("on_upload_progress"),
4849
"on_download_progress": config.get("on_download_progress"),
4950
"on_conversion_progress": config.get("on_conversion_progress"),
@@ -57,7 +58,7 @@ def __init__(self, config: ConversionToolsConfig):
5758
retries=self.config["retries"],
5859
retry_delay=self.config["retry_delay"],
5960
retryable_statuses=self.config["retryable_statuses"],
60-
user_agent=f"conversiontools-python/{VERSION}",
61+
user_agent=self.config["user_agent"],
6162
)
6263

6364
# Initialize API clients
@@ -153,7 +154,7 @@ def on_progress(status: Dict[str, Any]) -> None:
153154
task.wait(wait_options)
154155

155156
# Download result
156-
output_path = task.download_to(output)
157+
output_path = task.download_to(output, on_progress=self.config["on_download_progress"])
157158

158159
return output_path
159160

@@ -245,7 +246,7 @@ def on_progress(status: Dict[str, Any]) -> None:
245246
await task.wait_async(wait_options)
246247

247248
# Download result
248-
output_path = await task.download_to_async(output)
249+
output_path = await task.download_to_async(output, on_progress=self.config["on_download_progress"])
249250

250251
return output_path
251252

conversiontools/models/task.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
Task model - High-level interface for conversion tasks
33
"""
44

5-
from typing import Optional, Dict, Any, TYPE_CHECKING
6-
from ..types.config import TaskStatus, TaskStatusResponse, WaitOptions
5+
from typing import Optional, Dict, Any, Iterator, AsyncIterator, Callable, TYPE_CHECKING
6+
from ..types.config import TaskStatus, TaskStatusResponse, WaitOptions, ProgressEvent
77
from ..utils.errors import ConversionError
88
from ..utils.polling import poll_task_status_sync, poll_task_status_async
99

@@ -204,25 +204,54 @@ async def download_bytes_async(self) -> bytes:
204204

205205
return await self._files_api.download_bytes_async(self._file_id)
206206

207-
def download_to(self, output_path: Optional[str] = None) -> str:
207+
def download_stream(self) -> Iterator[bytes]:
208+
"""Download result file as a byte stream (sync)"""
209+
if not self._file_id:
210+
raise ConversionError(
211+
"No result file available. Task may not be complete.",
212+
self.id,
213+
)
214+
215+
yield from self._files_api.download_stream(self._file_id)
216+
217+
async def download_stream_async(self) -> AsyncIterator[bytes]:
218+
"""Download result file as a byte stream (async)"""
219+
if not self._file_id:
220+
raise ConversionError(
221+
"No result file available. Task may not be complete.",
222+
self.id,
223+
)
224+
225+
async for chunk in self._files_api.download_stream_async(self._file_id):
226+
yield chunk
227+
228+
def download_to(
229+
self,
230+
output_path: Optional[str] = None,
231+
on_progress: Optional[Callable[[ProgressEvent], None]] = None,
232+
) -> str:
208233
"""Download result file to path (sync)"""
209234
if not self._file_id:
210235
raise ConversionError(
211236
"No result file available. Task may not be complete.",
212237
self.id,
213238
)
214239

215-
return self._files_api.download_to(self._file_id, output_path)
240+
return self._files_api.download_to(self._file_id, output_path, on_progress)
216241

217-
async def download_to_async(self, output_path: Optional[str] = None) -> str:
242+
async def download_to_async(
243+
self,
244+
output_path: Optional[str] = None,
245+
on_progress: Optional[Callable[[ProgressEvent], None]] = None,
246+
) -> str:
218247
"""Download result file to path (async)"""
219248
if not self._file_id:
220249
raise ConversionError(
221250
"No result file available. Task may not be complete.",
222251
self.id,
223252
)
224253

225-
return await self._files_api.download_to_async(self._file_id, output_path)
254+
return await self._files_api.download_to_async(self._file_id, output_path, on_progress)
226255

227256
def to_dict(self) -> Dict[str, Any]:
228257
"""Convert to dictionary"""

conversiontools/types/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class ConversionToolsConfig(TypedDict, total=False):
5353
max_polling_interval: float
5454
polling_backoff: float
5555
webhook_url: str
56+
user_agent: str
5657
on_upload_progress: Callable[[ProgressEvent], None]
5758
on_download_progress: Callable[[ProgressEvent], None]
5859
on_conversion_progress: Callable[[ConversionProgressEvent], None]

tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)