From 07244fd030625ca49260dfba58d86eea0c4b5b0e Mon Sep 17 00:00:00 2001 From: Melissa <65674838+meck-gd@users.noreply.github.com> Date: Sun, 14 Dec 2025 10:57:57 +0100 Subject: [PATCH 1/2] Improve cleaners (#2787) * cleaners: Fix bug where --delete-mongo ignored filters * cleaners: Add --tags-tasks-filter option --- lib/cuckoo/common/cleaners_utils.py | 19 ++++++++++++------- utils/cleaners.py | 11 +++++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/cuckoo/common/cleaners_utils.py b/lib/cuckoo/common/cleaners_utils.py index 14953df9bf6..b7d546e9692 100644 --- a/lib/cuckoo/common/cleaners_utils.py +++ b/lib/cuckoo/common/cleaners_utils.py @@ -56,7 +56,6 @@ mongo_update_one, mongo_update_many, mongo_delete_calls_by_task_id_in_range, - mongo_delete_data_range, ) elif repconf.elasticsearchdb.enabled: from dev_utils.elasticsearchdb import all_docs, delete_analysis_and_related_calls, get_analysis_index @@ -470,9 +469,9 @@ def tmp_clean_before(timerange: str): def cuckoo_clean_before(args: dict): - """Clean up failed tasks + """Clean up old tasks It deletes all stored data from file system and configured databases (SQL - and MongoDB for tasks completed before now - time range. + and optionally MongoDB) for tasks completed before now - time range. """ # Init logging. # This need to init a console logger handler, because the standard @@ -498,7 +497,12 @@ def cuckoo_clean_before(args: dict): log.info("url filter applied") category = "url" - old_tasks = db.list_tasks(added_before=added_before, category=category, not_status=TASK_PENDING) + tags_tasks_like = args.get("tags_tasks_filter", False) + + old_tasks = db.list_tasks(added_before=added_before, + category=category, + not_status=TASK_PENDING, + tags_tasks_like=tags_tasks_like) # We need this to cleanup file system and MongoDB calls collection id_arr = [e.id for e in old_tasks] @@ -535,10 +539,11 @@ def cuckoo_clean_before(args: dict): response = input("You are deleting mongo data in cluster, are you sure you want to continue? y/n") if response.lower() in ("n", "not"): sys.exit() - mongo_delete_data_range(range_end=highest_id) - # cleanup_files_collection_by_id(highest_id) + mongo_delete_data(id_arr) - db.delete_tasks(added_before=added_before, category=category) + db.delete_tasks(added_before=added_before, + category=category, + tags_tasks_like=tags_tasks_like) def cuckoo_clean_sorted_pcap_dump(): diff --git a/utils/cleaners.py b/utils/cleaners.py index 4c927169e5c..fe8c121c051 100644 --- a/utils/cleaners.py +++ b/utils/cleaners.py @@ -30,18 +30,21 @@ parser.add_argument("--pcap-sorted-clean", help="Remove sorted pcap from jobs", action="store_true", required=False) parser.add_argument( "--suricata-zero-alert-filter", - help="only remove events with zero suri alerts DELETE AFTER ONLY", + help="only remove events with zero suri alerts (DELETE-OLDER-THAN ONLY)", action="store_true", required=False, ) parser.add_argument( - "--urls-only-filter", help="only remove url events filter DELETE AFTER ONLY", action="store_true", required=False + "--urls-only-filter", help="only remove url events filter (DELETE-OLDER-THAN ONLY)", action="store_true", required=False ) parser.add_argument( - "--files-only-filter", help="only remove files events filter DELETE AFTER ONLY", action="store_true", required=False + "--files-only-filter", help="only remove files events filter (DELETE-OLDER-THAN ONLY)", action="store_true", required=False ) parser.add_argument( - "--custom-include-filter", help="Only include jobs that match the custom field DELETE AFTER ONLY", required=False + "--custom-include-filter", help="Only include jobs that match the custom field (DELETE-OLDER-THAN ONLY)", required=False + ) + parser.add_argument( + "--tags-tasks-filter", help="Only include jobs whose tags_tasks contains this string (DELETE-OLDER-THAN ONLY)", required=False ) parser.add_argument( "--bson-suri-logs-clean", help="clean bson and suri logs from analysis dirs", required=False, action="store_true" From 74c5a92a2c7be28305e0443ff2349ce2f6c37386 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Sun, 14 Dec 2025 10:58:23 +0100 Subject: [PATCH 2/2] delete-pending argument --- lib/cuckoo/common/cleaners_utils.py | 11 +++++++---- utils/cleaners.py | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/cuckoo/common/cleaners_utils.py b/lib/cuckoo/common/cleaners_utils.py index b7d546e9692..d264e73f6e9 100644 --- a/lib/cuckoo/common/cleaners_utils.py +++ b/lib/cuckoo/common/cleaners_utils.py @@ -498,11 +498,14 @@ def cuckoo_clean_before(args: dict): category = "url" tags_tasks_like = args.get("tags_tasks_filter", False) + delete_pending = args.get("delete_pending", False) - old_tasks = db.list_tasks(added_before=added_before, - category=category, - not_status=TASK_PENDING, - tags_tasks_like=tags_tasks_like) + old_tasks = db.list_tasks( + added_before=added_before, + category=category, + not_status=False if delete_pending else TASK_PENDING, + tags_tasks_like=tags_tasks_like + ) # We need this to cleanup file system and MongoDB calls collection id_arr = [e.id for e in old_tasks] diff --git a/utils/cleaners.py b/utils/cleaners.py index fe8c121c051..a8f1d850bd7 100644 --- a/utils/cleaners.py +++ b/utils/cleaners.py @@ -65,6 +65,9 @@ parser.add_argument( "-dm", "--delete-mongo", help="Delete data in mongo. By default keep", required=False, default=False, action="store_true" ) + parser.add_argument( + "-dp", "--delete-pending", help="Delete also pending tasks. By default keep", required=False, default=False, action="store_true" + ) parser.add_argument( "-duf", "--delete-unused-file-data-in-mongo",