|
4 | 4 |
|
5 | 5 | import os |
6 | 6 | import re |
| 7 | +import httpx |
7 | 8 | from pathlib import Path |
8 | | -from typing import Optional, Union, BinaryIO |
| 9 | +from typing import Optional, Union, BinaryIO, Iterator, AsyncIterator, Callable |
9 | 10 | from urllib.parse import quote |
10 | | -from ..types.config import FileUploadResponse, FileInfo, FileUploadOptions |
| 11 | +from ..types.config import FileUploadResponse, FileInfo, FileUploadOptions, ProgressEvent |
11 | 12 | from ..utils.errors import ValidationError |
12 | 13 | from ..utils.validation import validate_file_id |
13 | 14 | from ..utils.progress import create_progress_event |
14 | 15 | from .http import HttpClient |
15 | 16 |
|
16 | 17 |
|
| 18 | +def _extract_filename(disposition: Optional[str]) -> Optional[str]: |
| 19 | + """Extract filename from Content-Disposition header""" |
| 20 | + if not disposition: |
| 21 | + return None |
| 22 | + matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition) |
| 23 | + if matches and matches.group(1): |
| 24 | + return matches.group(1).strip('\'"') |
| 25 | + return None |
| 26 | + |
| 27 | + |
17 | 28 | class FilesAPI: |
18 | 29 | """Files API for upload, download, and file management""" |
19 | 30 |
|
@@ -61,8 +72,6 @@ def upload( |
61 | 72 | on_progress(create_progress_event(total, total)) |
62 | 73 |
|
63 | 74 | # Create multipart form data |
64 | | - import httpx |
65 | | - |
66 | 75 | files = {"file": (filename or "file", file_data)} |
67 | 76 |
|
68 | 77 | # Upload file using httpx directly with multipart |
@@ -125,8 +134,6 @@ async def upload_async( |
125 | 134 | on_progress(create_progress_event(total, total)) |
126 | 135 |
|
127 | 136 | # Create multipart form data |
128 | | - import httpx |
129 | | - |
130 | 137 | files = {"file": (filename or "file", file_data)} |
131 | 138 |
|
132 | 139 | # Upload file using httpx directly with multipart |
@@ -170,59 +177,146 @@ async def download_bytes_async(self, file_id: str) -> bytes: |
170 | 177 | response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True) |
171 | 178 | return response.content |
172 | 179 |
|
173 | | - def download_to(self, file_id: str, output_path: Optional[str] = None) -> str: |
| 180 | + def download_stream(self, file_id: str) -> Iterator[bytes]: |
| 181 | + """Download file as a byte stream (sync)""" |
| 182 | + validate_file_id(file_id) |
| 183 | + url = f"{self.http.base_url}/files/{quote(file_id)}" |
| 184 | + headers = {"Authorization": f"Bearer {self.http.api_token}"} |
| 185 | + if self.http.user_agent: |
| 186 | + headers["User-Agent"] = self.http.user_agent |
| 187 | + |
| 188 | + with httpx.Client(timeout=self.http.timeout) as client: |
| 189 | + with client.stream("GET", url, headers=headers) as response: |
| 190 | + if not response.is_success: |
| 191 | + self.http._handle_error_response(response) |
| 192 | + self.http._extract_rate_limits(response.headers) |
| 193 | + yield from response.iter_bytes() |
| 194 | + |
| 195 | + async def download_stream_async(self, file_id: str) -> AsyncIterator[bytes]: |
| 196 | + """Download file as a byte stream (async)""" |
| 197 | + validate_file_id(file_id) |
| 198 | + url = f"{self.http.base_url}/files/{quote(file_id)}" |
| 199 | + headers = {"Authorization": f"Bearer {self.http.api_token}"} |
| 200 | + if self.http.user_agent: |
| 201 | + headers["User-Agent"] = self.http.user_agent |
| 202 | + |
| 203 | + async with httpx.AsyncClient(timeout=self.http.timeout) as client: |
| 204 | + async with client.stream("GET", url, headers=headers) as response: |
| 205 | + if not response.is_success: |
| 206 | + self.http._handle_error_response(response) |
| 207 | + self.http._extract_rate_limits(response.headers) |
| 208 | + async for chunk in response.aiter_bytes(): |
| 209 | + yield chunk |
| 210 | + |
| 211 | + def download_to( |
| 212 | + self, |
| 213 | + file_id: str, |
| 214 | + output_path: Optional[str] = None, |
| 215 | + on_progress: Optional[Callable[[ProgressEvent], None]] = None, |
| 216 | + ) -> str: |
174 | 217 | """Download file to path (sync)""" |
175 | 218 | validate_file_id(file_id) |
176 | | - response = self.http.get(f"/files/{quote(file_id)}", raw=True) |
177 | 219 |
|
178 | | - # Determine output filename |
179 | | - filename = output_path |
180 | | - if not filename: |
181 | | - # Try to get filename from Content-Disposition header |
182 | | - disposition = response.headers.get("content-disposition") |
183 | | - if disposition: |
184 | | - matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition) |
185 | | - if matches and matches.group(1): |
186 | | - filename = matches.group(1).strip('\'"') |
| 220 | + if on_progress: |
| 221 | + url = f"{self.http.base_url}/files/{quote(file_id)}" |
| 222 | + headers = {"Authorization": f"Bearer {self.http.api_token}"} |
| 223 | + if self.http.user_agent: |
| 224 | + headers["User-Agent"] = self.http.user_agent |
| 225 | + |
| 226 | + with httpx.Client(timeout=self.http.timeout) as client: |
| 227 | + with client.stream("GET", url, headers=headers) as response: |
| 228 | + if not response.is_success: |
| 229 | + self.http._handle_error_response(response) |
| 230 | + self.http._extract_rate_limits(response.headers) |
| 231 | + |
| 232 | + filename = ( |
| 233 | + output_path |
| 234 | + or _extract_filename(response.headers.get("content-disposition")) |
| 235 | + or "result" |
| 236 | + ) |
| 237 | + output_dir = os.path.dirname(filename) |
| 238 | + if output_dir and not os.path.exists(output_dir): |
| 239 | + os.makedirs(output_dir, exist_ok=True) |
| 240 | + |
| 241 | + content_length = response.headers.get("content-length") |
| 242 | + total = int(content_length) if content_length else None |
| 243 | + loaded = 0 |
| 244 | + |
| 245 | + with open(filename, "wb") as f: |
| 246 | + for chunk in response.iter_bytes(): |
| 247 | + f.write(chunk) |
| 248 | + loaded += len(chunk) |
| 249 | + on_progress(create_progress_event(loaded, total)) |
| 250 | + |
| 251 | + return filename |
187 | 252 |
|
188 | | - filename = filename or "result" |
| 253 | + response = self.http.get(f"/files/{quote(file_id)}", raw=True) |
| 254 | + |
| 255 | + filename = output_path or _extract_filename( |
| 256 | + response.headers.get("content-disposition") |
| 257 | + ) or "result" |
189 | 258 |
|
190 | | - # Ensure directory exists |
191 | 259 | output_dir = os.path.dirname(filename) |
192 | 260 | if output_dir and not os.path.exists(output_dir): |
193 | 261 | os.makedirs(output_dir, exist_ok=True) |
194 | 262 |
|
195 | | - # Write file |
196 | 263 | with open(filename, "wb") as f: |
197 | 264 | f.write(response.content) |
198 | 265 |
|
199 | 266 | return filename |
200 | 267 |
|
201 | 268 | async def download_to_async( |
202 | | - self, file_id: str, output_path: Optional[str] = None |
| 269 | + self, |
| 270 | + file_id: str, |
| 271 | + output_path: Optional[str] = None, |
| 272 | + on_progress: Optional[Callable[[ProgressEvent], None]] = None, |
203 | 273 | ) -> str: |
204 | 274 | """Download file to path (async)""" |
205 | 275 | validate_file_id(file_id) |
206 | | - response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True) |
207 | 276 |
|
208 | | - # Determine output filename |
209 | | - filename = output_path |
210 | | - if not filename: |
211 | | - # Try to get filename from Content-Disposition header |
212 | | - disposition = response.headers.get("content-disposition") |
213 | | - if disposition: |
214 | | - matches = re.search(r'filename[^;=\n]*=(([\'"]).*?\2|[^;\n]*)', disposition) |
215 | | - if matches and matches.group(1): |
216 | | - filename = matches.group(1).strip('\'"') |
| 277 | + if on_progress: |
| 278 | + url = f"{self.http.base_url}/files/{quote(file_id)}" |
| 279 | + headers = {"Authorization": f"Bearer {self.http.api_token}"} |
| 280 | + if self.http.user_agent: |
| 281 | + headers["User-Agent"] = self.http.user_agent |
| 282 | + |
| 283 | + async with httpx.AsyncClient(timeout=self.http.timeout) as client: |
| 284 | + async with client.stream("GET", url, headers=headers) as response: |
| 285 | + if not response.is_success: |
| 286 | + self.http._handle_error_response(response) |
| 287 | + self.http._extract_rate_limits(response.headers) |
| 288 | + |
| 289 | + filename = ( |
| 290 | + output_path |
| 291 | + or _extract_filename(response.headers.get("content-disposition")) |
| 292 | + or "result" |
| 293 | + ) |
| 294 | + output_dir = os.path.dirname(filename) |
| 295 | + if output_dir and not os.path.exists(output_dir): |
| 296 | + os.makedirs(output_dir, exist_ok=True) |
| 297 | + |
| 298 | + content_length = response.headers.get("content-length") |
| 299 | + total = int(content_length) if content_length else None |
| 300 | + loaded = 0 |
| 301 | + |
| 302 | + with open(filename, "wb") as f: |
| 303 | + async for chunk in response.aiter_bytes(): |
| 304 | + f.write(chunk) |
| 305 | + loaded += len(chunk) |
| 306 | + on_progress(create_progress_event(loaded, total)) |
| 307 | + |
| 308 | + return filename |
| 309 | + |
| 310 | + response = await self.http.get_async(f"/files/{quote(file_id)}", raw=True) |
217 | 311 |
|
218 | | - filename = filename or "result" |
| 312 | + filename = output_path or _extract_filename( |
| 313 | + response.headers.get("content-disposition") |
| 314 | + ) or "result" |
219 | 315 |
|
220 | | - # Ensure directory exists |
221 | 316 | output_dir = os.path.dirname(filename) |
222 | 317 | if output_dir and not os.path.exists(output_dir): |
223 | 318 | os.makedirs(output_dir, exist_ok=True) |
224 | 319 |
|
225 | | - # Write file |
226 | 320 | with open(filename, "wb") as f: |
227 | 321 | f.write(response.content) |
228 | 322 |
|
|
0 commit comments