Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions cognition_objects/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,15 +185,27 @@ def get_integration_progress(
) -> float:
integration = get_by_id(integration_id)
count_all_records = integration_records_bo.count(integration)
all_tasks = get_all_etl_tasks(integration_id)
finished_tasks = [task for task in all_tasks if task.state in FINISHED_STATES]

if (
count_all_records == 0
or integration.state == enums.CognitionMarkdownFileState.FAILED.value
):
return 0.0
integration_progress = round((len(finished_tasks) / count_all_records) * 100.0, 2)

all_tasks = get_all_etl_tasks(integration_id)
finished_tasks = [task for task in all_tasks if task.state in FINISHED_STATES]
count_finished_tasks = len(finished_tasks)

# backward compatibility
if not all_tasks or len(all_tasks) != count_all_records:
all_records, _ = integration_records_bo.get_all_by_integration_id(
integration_id
)
count_finished_tasks += len(
[record for record in all_records if not record.etl_task_id]
)

integration_progress = round((count_finished_tasks / count_all_records) * 100.0, 2)
if integration.state not in FINISHED_STATES:
integration_progress = min(integration_progress - 1, 0)
return integration_progress
Expand Down
11 changes: 8 additions & 3 deletions cognition_objects/markdown_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __get_enriched_query(
category_origin: Optional[str] = None,
query_add: Optional[str] = "",
) -> str:
where_add = " AND (ed.config_ids->>'isDefault')::bool is true"
where_add = ""
if id:
id = prevent_sql_injection(id, isinstance(id, str))
where_add += f" AND md.id = '{id}'"
Expand All @@ -46,7 +46,8 @@ def __get_enriched_query(
md.*,
COALESCE(mf.num_files, 0) AS num_files,
COALESCE(mf.num_reviewed_files, 0) AS num_reviewed_files,
ecp.etl_config
ecp.etl_config,
ecp.id as etl_config_id
FROM cognition.{Tablenames.MARKDOWN_DATASET.value} md
LEFT JOIN (
SELECT dataset_id, COUNT(*) as num_files, COUNT(CASE WHEN is_reviewed = TRUE THEN 1 END) AS num_reviewed_files
Expand All @@ -56,7 +57,7 @@ def __get_enriched_query(
LEFT JOIN(
SELECT md.id, json_array_elements(md.useable_etl_configurations) config_ids
FROM cognition.{Tablenames.MARKDOWN_DATASET.value} md
) ed ON ed.id = md.id
) ed ON ed.id = md.id AND (ed.config_ids->>'isDefault')::bool is true
LEFT JOIN(
SELECT ecp.id, ecp.etl_config
FROM cognition.{Tablenames.ETL_CONFIG_PRESET.value} ecp
Expand Down Expand Up @@ -177,6 +178,7 @@ def update(
dataset_id: str,
name: Optional[str] = None,
description: Optional[str] = None,
useable_etl_configurations: Optional[List[Dict[str, Any]]] = None,
with_commit: bool = True,
) -> CognitionMarkdownDataset:
dataset = get(org_id, dataset_id)
Expand All @@ -187,6 +189,9 @@ def update(
if description:
dataset.description = description

if useable_etl_configurations:
dataset.useable_etl_configurations = useable_etl_configurations

general.flush_or_commit(with_commit)

return dataset
Expand Down
16 changes: 11 additions & 5 deletions cognition_objects/markdown_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,20 @@ def __get_enriched_query(
"etl_task",
"global",
prefix=et_prefix,
include_columns=["is_active", "error_message"],
include_columns=[
"started_at",
"finished_at",
"is_active",
"is_stale",
"llm_ops",
"error_message",
],
)

query = f"""SELECT
{mf_select}, {et_select}, LENGTH({mf_prefix}.content) as content_length,
COALESCE({et_prefix}.state, {mf_prefix}.state) state,
COALESCE({et_prefix}.started_at, {mf_prefix}.started_at) started_at,
COALESCE({et_prefix}.finished_at, {mf_prefix}.finished_at) finished_at
{mf_select}, {et_select}, LENGTH({mf_prefix}.content) AS content_length,
COALESCE({et_prefix}.state, {mf_prefix}.state) AS state,
{et_prefix}.meta_data->>'scope_readable' AS scope_readable
FROM cognition.markdown_file {mf_prefix}
LEFT JOIN global.etl_task {et_prefix} ON {mf_prefix}.etl_task_id = {et_prefix}.id
"""
Expand Down
5 changes: 1 addition & 4 deletions etl_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,17 +301,14 @@ def rm_tree(path: Path):
if item.is_dir():
rm_tree(item)
else:
item.unlink()
item.unlink(missing_ok=True)
path.rmdir()

etl_cache_dir = ETL_DIR / org_id / download_id
if etl_cache_dir.exists() and etl_cache_dir.is_dir():
rm_tree(etl_cache_dir)


# TODO: delete_etl_tasks for related file_reference_id


def get_download_key(org_id: str, download_id: str) -> Path:
return Path(org_id) / download_id / "download"

Expand Down
Loading