Skip to content

Commit a3c32f5

Browse files
authored
feat(api): Add the possibility to save results from external source (#112)
* feat(api): Add the possibility to save results from external source * chore: fix linting issues * feat(batch): add batch import result feature * chore: fix linting and poetry issues * chore(api): Bump API version to 3.8.0a0 * fix(api): fix worker to handle multiple queues * fix(scraper): To work with newest version of chromium
1 parent 433db70 commit a3c32f5

File tree

33 files changed

+2356
-2058
lines changed

33 files changed

+2356
-2058
lines changed

.devcontainer/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "Ecoindex python full stack dev container",
33
"image": "mcr.microsoft.com/devcontainers/python:3.12",
4-
"postCreateCommand": "pipx install poetry && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin",
4+
"postCreateCommand": "pipx install poetry==1.8.5 && poetry self add poetry-multiproject-plugin && poetry self add poetry-polylith-plugin",
55
"features": {
66
"ghcr.io/audacioustux/devcontainers/taskfile": {},
77
"ghcr.io/devcontainers/features/docker-in-docker:2": {

.vscode/launch.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
8+
{
9+
"name": "Python Debugger: FastAPI",
10+
"type": "debugpy",
11+
"request": "launch",
12+
"module": "uvicorn",
13+
"args": [
14+
"ecoindex.backend.main:app",
15+
"--reload"
16+
],
17+
"jinja": true
18+
}
19+
]
20+
}

bases/ecoindex/backend/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.7.0
1+
3.8.0a0
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from typing import Annotated
2+
3+
from ecoindex.config.settings import Settings
4+
from fastapi import Header, HTTPException, status
5+
6+
7+
def validate_api_key_batch(
8+
api_key: Annotated[
9+
str,
10+
Header(alias="X-Api-Key"),
11+
],
12+
):
13+
if not api_key:
14+
raise HTTPException(
15+
status_code=status.HTTP_403_FORBIDDEN,
16+
detail="Invalid API key",
17+
)
18+
19+
for authorized_api_key in Settings().API_KEYS_BATCH:
20+
if api_key == authorized_api_key["key"]:
21+
return authorized_api_key
22+
23+
raise HTTPException(
24+
status_code=status.HTTP_403_FORBIDDEN,
25+
detail="Invalid API key",
26+
)

bases/ecoindex/backend/routers/bff.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,5 +104,5 @@ async def get_latest_result_redirect(
104104
)
105105

106106
return RedirectResponse(
107-
url=f"{Settings().FRONTEND_BASE_URL}/resultat/?id={latest_result.latest_result.id}"
107+
url=f"{Settings().FRONTEND_BASE_URL}/resultat/?id={latest_result.latest_result.id}" # type: ignore
108108
)

bases/ecoindex/backend/routers/ecoindex.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,9 @@ async def get_ecoindex_analysis_list(
7171
page=pagination.page,
7272
size=pagination.size,
7373
sort_params=await get_sort_parameters(
74-
query_params=sort, model=ApiEcoindex
75-
), # type: ignore
74+
query_params=sort,
75+
model=ApiEcoindex, # type: ignore
76+
),
7677
)
7778
total_results = await get_count_analysis_db(
7879
session=session,

bases/ecoindex/backend/routers/tasks.py

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
from json import loads
2+
from typing import Annotated
23

34
import requests
45
from celery.result import AsyncResult
6+
from ecoindex.backend.dependencies.validation import validate_api_key_batch
57
from ecoindex.backend.models.dependencies_parameters.id import IdParameter
68
from ecoindex.backend.utils import check_quota
79
from ecoindex.config.settings import Settings
810
from ecoindex.database.engine import get_session
11+
from ecoindex.database.models import ApiEcoindexes
912
from ecoindex.models import WebPage
1013
from ecoindex.models.enums import TaskStatus
1114
from ecoindex.models.response_examples import (
1215
example_daily_limit_response,
1316
example_host_unreachable,
1417
)
15-
from ecoindex.models.tasks import QueueTaskApi, QueueTaskResult
16-
from ecoindex.worker.tasks import ecoindex_task
18+
from ecoindex.models.tasks import QueueTaskApi, QueueTaskApiBatch, QueueTaskResult
19+
from ecoindex.worker.tasks import ecoindex_batch_import_task, ecoindex_task
1720
from ecoindex.worker_component import app as task_app
1821
from fastapi import APIRouter, Depends, HTTPException, Response, status
1922
from fastapi.params import Body
@@ -37,18 +40,23 @@
3740
)
3841
async def add_ecoindex_analysis_task(
3942
response: Response,
40-
web_page: WebPage = Body(
41-
default=...,
42-
title="Web page to analyze defined by its url and its screen resolution",
43-
example=WebPage(url="https://www.ecoindex.fr", width=1920, height=1080),
44-
),
43+
web_page: Annotated[
44+
WebPage,
45+
Body(
46+
default=...,
47+
title="Web page to analyze defined by its url and its screen resolution",
48+
example=WebPage(url="https://www.ecoindex.fr", width=1920, height=1080),
49+
),
50+
],
4551
session: AsyncSession = Depends(get_session),
4652
) -> str:
4753
if Settings().DAILY_LIMIT_PER_HOST:
4854
remaining_quota = await check_quota(
4955
session=session, host=web_page.get_url_host()
5056
)
51-
response.headers["X-Remaining-Daily-Requests"] = str(remaining_quota - 1)
57+
58+
if remaining_quota:
59+
response.headers["X-Remaining-Daily-Requests"] = str(remaining_quota - 1)
5260

5361
if (
5462
Settings().EXCLUDED_HOSTS
@@ -63,13 +71,12 @@ async def add_ecoindex_analysis_task(
6371
r = requests.head(url=web_page.url, timeout=5)
6472
r.raise_for_status()
6573
except Exception:
66-
print(f"The URL {web_page.url} is not reachable")
6774
raise HTTPException(
6875
status_code=521,
6976
detail=f"The URL {web_page.url} is unreachable. Are you really sure of this url? 🤔",
7077
)
7178

72-
task_result = ecoindex_task.delay(
79+
task_result = ecoindex_task.delay( # type: ignore
7380
url=str(web_page.url), width=web_page.width, height=web_page.height
7481
)
7582

@@ -93,7 +100,7 @@ async def get_ecoindex_analysis_task_by_id(
93100
t = AsyncResult(id=str(id), app=task_app)
94101

95102
task_response = QueueTaskApi(
96-
id=t.id,
103+
id=str(t.id),
97104
status=t.state,
98105
)
99106

@@ -125,3 +132,63 @@ async def delete_ecoindex_analysis_task_by_id(
125132
res = task_app.control.revoke(id, terminate=True, signal="SIGKILL")
126133

127134
return res
135+
136+
137+
@router.post(
138+
name="Save ecoindex analysis from external source in batch mode",
139+
path="/batch",
140+
response_description="Identifier of the task that has been created in queue",
141+
responses={
142+
status.HTTP_201_CREATED: {"model": str},
143+
status.HTTP_403_FORBIDDEN: {"model": str},
144+
},
145+
description="This save ecoindex analysis from external source in batch mode. Limited to 100 entries at a time",
146+
status_code=status.HTTP_201_CREATED,
147+
)
148+
async def add_ecoindex_analysis_task_batch(
149+
results: Annotated[
150+
ApiEcoindexes,
151+
Body(
152+
default=...,
153+
title="List of ecoindex analysis results to save",
154+
example=[],
155+
min_length=1,
156+
max_length=100,
157+
),
158+
],
159+
batch_key: str = Depends(validate_api_key_batch),
160+
):
161+
task_result = ecoindex_batch_import_task.delay( # type: ignore
162+
results=[result.model_dump() for result in results],
163+
source=batch_key["source"], # type: ignore
164+
)
165+
166+
return task_result.id
167+
168+
169+
@router.get(
170+
name="Get ecoindex analysis batch task by id",
171+
path="/batch/{id}",
172+
responses={
173+
status.HTTP_200_OK: {"model": QueueTaskApiBatch},
174+
status.HTTP_425_TOO_EARLY: {"model": QueueTaskApiBatch},
175+
},
176+
response_description="Get one ecoindex batch task result by its id",
177+
description="This returns an ecoindex batch task result given by its unique identifier",
178+
)
179+
async def get_ecoindex_analysis_batch_task_by_id(
180+
response: Response,
181+
id: IdParameter,
182+
_: str = Depends(validate_api_key_batch),
183+
) -> QueueTaskApiBatch:
184+
t = AsyncResult(id=str(id), app=task_app)
185+
186+
task_response = QueueTaskApiBatch(
187+
id=str(t.id),
188+
status=t.state,
189+
)
190+
191+
if t.state == TaskStatus.PENDING:
192+
response.status_code = status.HTTP_425_TOO_EARLY
193+
194+
return task_response

bases/ecoindex/backend/utils/__init__.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ async def format_exception_response(exception: Exception) -> ExceptionResponse:
1919
return ExceptionResponse(
2020
exception=type(exception).__name__,
2121
args=[arg for arg in exception.args if arg] if exception.args else [],
22-
message=exception.msg if hasattr(exception, "msg") else None,
22+
message=exception.msg if hasattr(exception, "msg") else None, # type: ignore
2323
)
2424

2525

@@ -45,7 +45,7 @@ async def get_sort_parameters(query_params: list[str], model: BaseModel) -> list
4545
result = []
4646

4747
for query_param in query_params:
48-
pattern = re.compile("^\w+:(asc|desc)$")
48+
pattern = re.compile("^\w+:(asc|desc)$") # type: ignore
4949

5050
if not re.fullmatch(pattern, query_param):
5151
validation_error.append(
@@ -67,8 +67,9 @@ async def get_sort_parameters(query_params: list[str], model: BaseModel) -> list
6767
"type": "value_error.sort",
6868
}
6969
)
70+
continue
7071

71-
result.append(Sort(clause=sort_params[0], sort=sort_params[1]))
72+
result.append(Sort(clause=sort_params[0], sort=sort_params[1])) # type: ignore
7273

7374
if validation_error:
7475
raise HTTPException(
@@ -94,7 +95,7 @@ async def check_quota(
9495
raise QuotaExceededException(
9596
limit=Settings().DAILY_LIMIT_PER_HOST,
9697
host=host,
97-
latest_result=loads(latest_result.model_dump_json()),
98+
latest_result=loads(latest_result.model_dump_json() or "{}"), # type: ignore
9899
)
99100

100101
return Settings().DAILY_LIMIT_PER_HOST - count_daily_request_per_host

bases/ecoindex/cli/app.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -138,30 +138,32 @@ def analyze(
138138
secho(f"⏲️ Crawling root url {url[0]} -> Wait a minute!", fg=colors.MAGENTA)
139139
with spinner():
140140
urls = get_urls_recursive(main_url=url[0])
141-
urls = urls if urls else url
141+
urls = urls if urls else url # type: ignore
142142

143143
(
144144
file_prefix,
145145
input_file,
146146
logger_file,
147-
) = get_file_prefix_input_file_logger_file(urls=urls)
147+
) = get_file_prefix_input_file_logger_file(urls=urls) # type: ignore
148148

149149
elif url:
150-
urls = get_url_from_args(urls_arg=url)
150+
urls = get_url_from_args(urls_arg=url) # type: ignore
151151
(
152152
file_prefix,
153153
input_file,
154154
logger_file,
155-
) = get_file_prefix_input_file_logger_file(urls=urls, tmp_folder=tmp_folder)
155+
) = get_file_prefix_input_file_logger_file(urls=urls, tmp_folder=tmp_folder) # type: ignore
156156

157157
elif urls_file:
158-
urls = get_urls_from_file(urls_file=urls_file)
158+
urls = get_urls_from_file(urls_file=urls_file) # type: ignore
159159
(
160160
file_prefix,
161161
input_file,
162162
logger_file,
163163
) = get_file_prefix_input_file_logger_file(
164-
urls=urls, urls_file=urls_file, tmp_folder=tmp_folder
164+
urls=urls, # type: ignore
165+
urls_file=urls_file,
166+
tmp_folder=tmp_folder,
165167
)
166168
elif sitemap:
167169
secho(
@@ -172,14 +174,14 @@ def analyze(
172174
file_prefix,
173175
input_file,
174176
logger_file,
175-
) = get_file_prefix_input_file_logger_file(urls=urls)
177+
) = get_file_prefix_input_file_logger_file(urls=urls) # type: ignore
176178

177179
else:
178180
secho("🔥 You must provide an url...", fg=colors.RED)
179181
raise Exit(code=1)
180182

181183
if input_file:
182-
write_urls_to_file(file_prefix=file_prefix, urls=urls)
184+
write_urls_to_file(file_prefix=file_prefix, urls=urls) # type: ignore
183185
secho(f"📁️ Urls recorded in file `{input_file}`")
184186

185187
if logger_file:
@@ -266,13 +268,13 @@ def analyze(
266268

267269
Path(output_folder).mkdir(parents=True, exist_ok=True)
268270
write_results_to_file(
269-
filename=output_filename, results=results, export_format=export_format
271+
filename=str(output_filename), results=results, export_format=export_format
270272
)
271273
secho(f"🙌️ File {output_filename} written !", fg=colors.GREEN)
272274
if html_report:
273275
Report(
274276
results_file=output_filename,
275-
output_path=output_folder,
277+
output_path=str(output_folder),
276278
domain=file_prefix,
277279
date=time_now,
278280
language=html_report_language,
@@ -317,7 +319,7 @@ def report(
317319
output_folder = output_folder if output_folder else dirname(results_file)
318320

319321
Report(
320-
results_file=results_file,
322+
results_file=Path(results_file),
321323
output_path=output_folder,
322324
domain=domain,
323325
date=datetime.now(),

0 commit comments

Comments
 (0)