From b2a5a5c40e4b9a952e2b7d9ed4b7c498e6daa488 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sun, 7 Jun 2026 12:12:00 +0200 Subject: [PATCH] perf(importers): batch BurpRawRequestResponse inserts + re-enable perf tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace per-finding save() calls in process_request_response_pairs with bulk_create at batch boundaries, mirroring the location_handler pattern. Reduces DB round-trips proportionally to findings with req/resp data. Drops the no-op clean() calls (BurpRawRequestResponse has no custom clean). Re-enable TestDojoImporterPerformanceSmall and TestDojoImporterPerformanceSmallLocations with recalibrated query counts after the RBAC→legacy authorization migration. --- dojo/importers/base_importer.py | 27 +++++---- dojo/importers/default_importer.py | 1 + dojo/importers/default_reimporter.py | 2 + unittests/test_importers_performance.py | 73 +++++++++++-------------- 4 files changed, 50 insertions(+), 53 deletions(-) diff --git a/dojo/importers/base_importer.py b/dojo/importers/base_importer.py index d87524185fe..f50f0d458d2 100644 --- a/dojo/importers/base_importer.py +++ b/dojo/importers/base_importer.py @@ -77,6 +77,7 @@ def __init__( and will raise a `NotImplemented` exception """ ImporterOptions.__init__(self, *args, **kwargs) + self.pending_burp_rr: list[BurpRawRequestResponse] = [] def check_child_implementation_exception(self): """ @@ -716,24 +717,26 @@ def process_request_response_pairs( Create BurpRawRequestResponse objects linked to the finding without returning the finding afterward """ - if len(unsaved_req_resp := getattr(finding, "unsaved_req_resp", [])) > 0: - for req_resp in unsaved_req_resp: - burp_rr = BurpRawRequestResponse( - finding=finding, - burpRequestBase64=base64.b64encode(req_resp["req"].encode("utf-8")), - burpResponseBase64=base64.b64encode(req_resp["resp"].encode("utf-8"))) - burp_rr.clean() - burp_rr.save() + for req_resp in getattr(finding, "unsaved_req_resp", []): + self.pending_burp_rr.append(BurpRawRequestResponse( + finding=finding, + burpRequestBase64=base64.b64encode(req_resp["req"].encode("utf-8")), + burpResponseBase64=base64.b64encode(req_resp["resp"].encode("utf-8")), + )) unsaved_request = getattr(finding, "unsaved_request", None) unsaved_response = getattr(finding, "unsaved_response", None) if unsaved_request is not None and unsaved_response is not None: - burp_rr = BurpRawRequestResponse( + self.pending_burp_rr.append(BurpRawRequestResponse( finding=finding, burpRequestBase64=base64.b64encode(unsaved_request.encode()), - burpResponseBase64=base64.b64encode(unsaved_response.encode())) - burp_rr.clean() - burp_rr.save() + burpResponseBase64=base64.b64encode(unsaved_response.encode()), + )) + + def flush_burp_request_response(self) -> None: + if self.pending_burp_rr: + BurpRawRequestResponse.objects.bulk_create(self.pending_burp_rr, batch_size=1000) + self.pending_burp_rr.clear() def process_locations( self, diff --git a/dojo/importers/default_importer.py b/dojo/importers/default_importer.py index 3a920577d2d..4a72c05c343 100644 --- a/dojo/importers/default_importer.py +++ b/dojo/importers/default_importer.py @@ -275,6 +275,7 @@ def _process_findings_internal( # If batch is full or we're at the end, persist locations/endpoints and dispatch if len(batch_finding_ids) >= batch_max_size or is_final_finding: self.location_handler.persist() + self.flush_burp_request_response() # Apply parser-supplied tags for this batch before post-processing starts, # so rules/deduplication tasks see the tags already on the findings. bulk_apply_parser_tags(findings_with_parser_tags) diff --git a/dojo/importers/default_reimporter.py b/dojo/importers/default_reimporter.py index e9c6567107a..4982c4c37b1 100644 --- a/dojo/importers/default_reimporter.py +++ b/dojo/importers/default_reimporter.py @@ -438,6 +438,7 @@ def _process_findings_internal( # They don't need to be aligned since they optimize different operations. if len(batch_finding_ids) >= dedupe_batch_max_size or is_final: self.location_handler.persist() + self.flush_burp_request_response() # Apply parser-supplied tags for this batch before post-processing starts, # so rules/deduplication tasks see the tags already on the findings. bulk_apply_parser_tags(findings_with_parser_tags) @@ -561,6 +562,7 @@ def close_old_findings( mitigated_findings.append(finding) # Persist any accumulated location/endpoint status changes self.location_handler.persist() + self.flush_burp_request_response() # push finding groups to jira since we only only want to push whole groups # We dont check if the finding jira sync is applicable quite yet until we can get in the loop # but this is a way to at least make it that far diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index dc82f28114d..ce9133e4a6b 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -20,7 +20,6 @@ import logging from contextlib import contextmanager -from unittest import skip from unittest.mock import patch from crum import impersonate @@ -275,11 +274,6 @@ def _import_reimport_performance( self.assertGreater(len_closed_findings4, 0, "Step 4 (empty reimport with close_old_findings=True) should close findings") -@skip("Re-baseline pending: Track B legacy authorization reduces auth-layer query " - "overhead (no per-action role-permission lookups, simpler permission_to_action " - "dispatch). Expected query counts here were calibrated under RBAC and are " - "consistently 1-7 queries higher than legacy actual. Re-baseline with a fresh " - "calibration run after the upstream merge.") @tag("performance") @skip_unless_v2 class TestDojoImporterPerformanceSmall(TestDojoImporterPerformanceBase): @@ -349,13 +343,13 @@ def test_import_reimport_reimport_performance_pghistory_async(self): configure_pghistory_triggers() self._import_reimport_performance( - expected_num_queries1=171, + expected_num_queries1=170, expected_num_async_tasks1=2, - expected_num_queries2=124, + expected_num_queries2=123, expected_num_async_tasks2=1, - expected_num_queries3=29, + expected_num_queries3=28, expected_num_async_tasks3=1, - expected_num_queries4=100, + expected_num_queries4=99, expected_num_async_tasks4=0, ) @@ -373,13 +367,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._import_reimport_performance( - expected_num_queries1=187, + expected_num_queries1=184, expected_num_async_tasks1=2, - expected_num_queries2=132, + expected_num_queries2=131, expected_num_async_tasks2=1, - expected_num_queries3=37, + expected_num_queries3=36, expected_num_async_tasks3=1, - expected_num_queries4=100, + expected_num_queries4=99, expected_num_async_tasks4=0, ) @@ -398,13 +392,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr self.system_settings(enable_product_grade=True) self._import_reimport_performance( - expected_num_queries1=197, + expected_num_queries1=194, expected_num_async_tasks1=4, - expected_num_queries2=142, + expected_num_queries2=141, expected_num_async_tasks2=3, - expected_num_queries3=44, + expected_num_queries3=43, expected_num_async_tasks3=3, - expected_num_queries4=109, + expected_num_queries4=108, expected_num_async_tasks4=2, ) @@ -530,9 +524,9 @@ def test_deduplication_performance_pghistory_async(self): self.system_settings(enable_deduplication=True) self._deduplication_performance( - expected_num_queries1=110, + expected_num_queries1=109, expected_num_async_tasks1=2, - expected_num_queries2=90, + expected_num_queries2=89, expected_num_async_tasks2=2, check_duplicates=False, # Async mode - deduplication happens later ) @@ -551,18 +545,15 @@ def test_deduplication_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._deduplication_performance( - expected_num_queries1=126, + expected_num_queries1=123, expected_num_async_tasks1=2, - expected_num_queries2=107, + expected_num_queries2=104, expected_num_async_tasks2=2, ) @tag("performance") @override_settings(V3_FEATURE_LOCATIONS=True) -@skip("Re-baseline pending: same RBAC→legacy query-count drift as " - "TestDojoImporterPerformanceSmall. See that class's skip note for the " - "rationale.") class TestDojoImporterPerformanceSmallLocations(TestDojoImporterPerformanceBase): r""" @@ -642,13 +633,13 @@ def test_import_reimport_reimport_performance_pghistory_async(self): configure_pghistory_triggers() self._import_reimport_performance( - expected_num_queries1=178, + expected_num_queries1=177, expected_num_async_tasks1=2, - expected_num_queries2=133, + expected_num_queries2=132, expected_num_async_tasks2=1, - expected_num_queries3=37, + expected_num_queries3=36, expected_num_async_tasks3=1, - expected_num_queries4=101, + expected_num_queries4=100, expected_num_async_tasks4=0, ) @@ -666,13 +657,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._import_reimport_performance( - expected_num_queries1=196, + expected_num_queries1=193, expected_num_async_tasks1=2, - expected_num_queries2=143, + expected_num_queries2=142, expected_num_async_tasks2=1, - expected_num_queries3=47, + expected_num_queries3=46, expected_num_async_tasks3=1, - expected_num_queries4=101, + expected_num_queries4=100, expected_num_async_tasks4=0, ) @@ -691,13 +682,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr self.system_settings(enable_product_grade=True) self._import_reimport_performance( - expected_num_queries1=209, + expected_num_queries1=206, expected_num_async_tasks1=4, - expected_num_queries2=156, + expected_num_queries2=155, expected_num_async_tasks2=3, - expected_num_queries3=54, + expected_num_queries3=53, expected_num_async_tasks3=3, - expected_num_queries4=113, + expected_num_queries4=112, expected_num_async_tasks4=2, ) @@ -798,9 +789,9 @@ def test_deduplication_performance_pghistory_async(self): self.system_settings(enable_deduplication=True) self._deduplication_performance( - expected_num_queries1=117, + expected_num_queries1=116, expected_num_async_tasks1=2, - expected_num_queries2=93, + expected_num_queries2=92, expected_num_async_tasks2=2, check_duplicates=False, # Async mode - deduplication happens later ) @@ -818,8 +809,8 @@ def test_deduplication_performance_pghistory_no_async(self): testuser.usercontactinfo.save() self._deduplication_performance( - expected_num_queries1=135, + expected_num_queries1=132, expected_num_async_tasks1=2, - expected_num_queries2=218, + expected_num_queries2=215, expected_num_async_tasks2=2, )