Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions dojo/importers/base_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
and will raise a `NotImplemented` exception
"""
ImporterOptions.__init__(self, *args, **kwargs)
self.pending_burp_rr: list[BurpRawRequestResponse] = []

def check_child_implementation_exception(self):
"""
Expand Down Expand Up @@ -716,24 +717,26 @@ def process_request_response_pairs(
Create BurpRawRequestResponse objects linked to the finding without
returning the finding afterward
"""
if len(unsaved_req_resp := getattr(finding, "unsaved_req_resp", [])) > 0:
for req_resp in unsaved_req_resp:
burp_rr = BurpRawRequestResponse(
finding=finding,
burpRequestBase64=base64.b64encode(req_resp["req"].encode("utf-8")),
burpResponseBase64=base64.b64encode(req_resp["resp"].encode("utf-8")))
burp_rr.clean()
burp_rr.save()
for req_resp in getattr(finding, "unsaved_req_resp", []):
self.pending_burp_rr.append(BurpRawRequestResponse(
finding=finding,
burpRequestBase64=base64.b64encode(req_resp["req"].encode("utf-8")),
burpResponseBase64=base64.b64encode(req_resp["resp"].encode("utf-8")),
))

unsaved_request = getattr(finding, "unsaved_request", None)
unsaved_response = getattr(finding, "unsaved_response", None)
if unsaved_request is not None and unsaved_response is not None:
burp_rr = BurpRawRequestResponse(
self.pending_burp_rr.append(BurpRawRequestResponse(
finding=finding,
burpRequestBase64=base64.b64encode(unsaved_request.encode()),
burpResponseBase64=base64.b64encode(unsaved_response.encode()))
burp_rr.clean()
burp_rr.save()
burpResponseBase64=base64.b64encode(unsaved_response.encode()),
))

def flush_burp_request_response(self) -> None:
if self.pending_burp_rr:
BurpRawRequestResponse.objects.bulk_create(self.pending_burp_rr, batch_size=1000)
self.pending_burp_rr.clear()

def process_locations(
self,
Expand Down
1 change: 1 addition & 0 deletions dojo/importers/default_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def _process_findings_internal(
# If batch is full or we're at the end, persist locations/endpoints and dispatch
if len(batch_finding_ids) >= batch_max_size or is_final_finding:
self.location_handler.persist()
self.flush_burp_request_response()
# Apply parser-supplied tags for this batch before post-processing starts,
# so rules/deduplication tasks see the tags already on the findings.
bulk_apply_parser_tags(findings_with_parser_tags)
Expand Down
2 changes: 2 additions & 0 deletions dojo/importers/default_reimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ def _process_findings_internal(
# They don't need to be aligned since they optimize different operations.
if len(batch_finding_ids) >= dedupe_batch_max_size or is_final:
self.location_handler.persist()
self.flush_burp_request_response()
# Apply parser-supplied tags for this batch before post-processing starts,
# so rules/deduplication tasks see the tags already on the findings.
bulk_apply_parser_tags(findings_with_parser_tags)
Expand Down Expand Up @@ -561,6 +562,7 @@ def close_old_findings(
mitigated_findings.append(finding)
# Persist any accumulated location/endpoint status changes
self.location_handler.persist()
self.flush_burp_request_response()
# push finding groups to jira since we only only want to push whole groups
# We dont check if the finding jira sync is applicable quite yet until we can get in the loop
# but this is a way to at least make it that far
Expand Down
73 changes: 32 additions & 41 deletions unittests/test_importers_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import logging
from contextlib import contextmanager
from unittest import skip
from unittest.mock import patch

from crum import impersonate
Expand Down Expand Up @@ -275,11 +274,6 @@ def _import_reimport_performance(
self.assertGreater(len_closed_findings4, 0, "Step 4 (empty reimport with close_old_findings=True) should close findings")


@skip("Re-baseline pending: Track B legacy authorization reduces auth-layer query "
"overhead (no per-action role-permission lookups, simpler permission_to_action "
"dispatch). Expected query counts here were calibrated under RBAC and are "
"consistently 1-7 queries higher than legacy actual. Re-baseline with a fresh "
"calibration run after the upstream merge.")
@tag("performance")
@skip_unless_v2
class TestDojoImporterPerformanceSmall(TestDojoImporterPerformanceBase):
Expand Down Expand Up @@ -349,13 +343,13 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
configure_pghistory_triggers()

self._import_reimport_performance(
expected_num_queries1=171,
expected_num_queries1=170,
expected_num_async_tasks1=2,
expected_num_queries2=124,
expected_num_queries2=123,
expected_num_async_tasks2=1,
expected_num_queries3=29,
expected_num_queries3=28,
expected_num_async_tasks3=1,
expected_num_queries4=100,
expected_num_queries4=99,
expected_num_async_tasks4=0,
)

Expand All @@ -373,13 +367,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._import_reimport_performance(
expected_num_queries1=187,
expected_num_queries1=184,
expected_num_async_tasks1=2,
expected_num_queries2=132,
expected_num_queries2=131,
expected_num_async_tasks2=1,
expected_num_queries3=37,
expected_num_queries3=36,
expected_num_async_tasks3=1,
expected_num_queries4=100,
expected_num_queries4=99,
expected_num_async_tasks4=0,
)

Expand All @@ -398,13 +392,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
self.system_settings(enable_product_grade=True)

self._import_reimport_performance(
expected_num_queries1=197,
expected_num_queries1=194,
expected_num_async_tasks1=4,
expected_num_queries2=142,
expected_num_queries2=141,
expected_num_async_tasks2=3,
expected_num_queries3=44,
expected_num_queries3=43,
expected_num_async_tasks3=3,
expected_num_queries4=109,
expected_num_queries4=108,
expected_num_async_tasks4=2,
)

Expand Down Expand Up @@ -530,9 +524,9 @@ def test_deduplication_performance_pghistory_async(self):
self.system_settings(enable_deduplication=True)

self._deduplication_performance(
expected_num_queries1=110,
expected_num_queries1=109,
expected_num_async_tasks1=2,
expected_num_queries2=90,
expected_num_queries2=89,
expected_num_async_tasks2=2,
check_duplicates=False, # Async mode - deduplication happens later
)
Expand All @@ -551,18 +545,15 @@ def test_deduplication_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._deduplication_performance(
expected_num_queries1=126,
expected_num_queries1=123,
expected_num_async_tasks1=2,
expected_num_queries2=107,
expected_num_queries2=104,
expected_num_async_tasks2=2,
)


@tag("performance")
@override_settings(V3_FEATURE_LOCATIONS=True)
@skip("Re-baseline pending: same RBAC→legacy query-count drift as "
"TestDojoImporterPerformanceSmall. See that class's skip note for the "
"rationale.")
class TestDojoImporterPerformanceSmallLocations(TestDojoImporterPerformanceBase):

r"""
Expand Down Expand Up @@ -642,13 +633,13 @@ def test_import_reimport_reimport_performance_pghistory_async(self):
configure_pghistory_triggers()

self._import_reimport_performance(
expected_num_queries1=178,
expected_num_queries1=177,
expected_num_async_tasks1=2,
expected_num_queries2=133,
expected_num_queries2=132,
expected_num_async_tasks2=1,
expected_num_queries3=37,
expected_num_queries3=36,
expected_num_async_tasks3=1,
expected_num_queries4=101,
expected_num_queries4=100,
expected_num_async_tasks4=0,
)

Expand All @@ -666,13 +657,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._import_reimport_performance(
expected_num_queries1=196,
expected_num_queries1=193,
expected_num_async_tasks1=2,
expected_num_queries2=143,
expected_num_queries2=142,
expected_num_async_tasks2=1,
expected_num_queries3=47,
expected_num_queries3=46,
expected_num_async_tasks3=1,
expected_num_queries4=101,
expected_num_queries4=100,
expected_num_async_tasks4=0,
)

Expand All @@ -691,13 +682,13 @@ def test_import_reimport_reimport_performance_pghistory_no_async_with_product_gr
self.system_settings(enable_product_grade=True)

self._import_reimport_performance(
expected_num_queries1=209,
expected_num_queries1=206,
expected_num_async_tasks1=4,
expected_num_queries2=156,
expected_num_queries2=155,
expected_num_async_tasks2=3,
expected_num_queries3=54,
expected_num_queries3=53,
expected_num_async_tasks3=3,
expected_num_queries4=113,
expected_num_queries4=112,
expected_num_async_tasks4=2,
)

Expand Down Expand Up @@ -798,9 +789,9 @@ def test_deduplication_performance_pghistory_async(self):
self.system_settings(enable_deduplication=True)

self._deduplication_performance(
expected_num_queries1=117,
expected_num_queries1=116,
expected_num_async_tasks1=2,
expected_num_queries2=93,
expected_num_queries2=92,
expected_num_async_tasks2=2,
check_duplicates=False, # Async mode - deduplication happens later
)
Expand All @@ -818,8 +809,8 @@ def test_deduplication_performance_pghistory_no_async(self):
testuser.usercontactinfo.save()

self._deduplication_performance(
expected_num_queries1=135,
expected_num_queries1=132,
expected_num_async_tasks1=2,
expected_num_queries2=218,
expected_num_queries2=215,
expected_num_async_tasks2=2,
)
Loading