diff --git a/lambdas/services/reporting/excel_report_generator_service.py b/lambdas/services/reporting/excel_report_generator_service.py index 40e4a88ef..d02449f48 100644 --- a/lambdas/services/reporting/excel_report_generator_service.py +++ b/lambdas/services/reporting/excel_report_generator_service.py @@ -1,6 +1,9 @@ from datetime import datetime, timezone from openpyxl.workbook import Workbook + +from models.report.bulk_upload_report import BulkUploadReport +from models.report.bulk_upload_report_output import OdsReport from utils.audit_logging_setup import LoggingService logger = LoggingService(__name__) @@ -10,23 +13,64 @@ class ExcelReportGenerator: def create_report_orchestration_xlsx( self, ods_code: str, - records: list[dict], + records: list[BulkUploadReport], output_path: str, ) -> str: logger.info( f"Creating Excel report for ODS code {ods_code} and records {len(records)}", ) + generated_at = datetime.now(timezone.utc) + ods_report = self._build_ods_report(ods_code, records, generated_at) + wb = Workbook() - ws = wb.active + self._create_detail_sheet(wb, ods_code, records, generated_at) + self._create_summary_sheet(wb, ods_report) + + wb.save(output_path) + + logger.info(f"Excel report written successfully for ods code {ods_code}") + return output_path + + @staticmethod + def _build_ods_report( + ods_code: str, + records: list[BulkUploadReport], + generated_at: datetime, + ) -> OdsReport: + return OdsReport( + generated_at=generated_at.strftime("%Y%m%d"), + uploader_ods_code=ods_code, + report_items=records, + ) + + def _create_detail_sheet( + self, + workbook: Workbook, + ods_code: str, + records: list[BulkUploadReport], + generated_at: datetime, + ) -> None: + ws = workbook.active ws.title = "Daily Upload Report" - # Report metadata - ws.append([f"ODS Code: {ods_code}"]) - ws.append([f"Generated at (UTC): {datetime.now(timezone.utc).isoformat()}"]) - ws.append([]) + self._append_detail_sheet_metadata(ws, ods_code, generated_at) + self._append_detail_sheet_headers(ws) + self._append_detail_sheet_rows(ws, records) + + @staticmethod + def _append_detail_sheet_metadata( + worksheet, + ods_code: str, + generated_at: datetime, + ) -> None: + worksheet.append([f"ODS Code: {ods_code}"]) + worksheet.append([f"Generated at (UTC): {generated_at.isoformat()}"]) + worksheet.append([]) - ws.append( + @staticmethod + def _append_detail_sheet_headers(worksheet) -> None: + worksheet.append( [ "NHS Number", "Date", @@ -34,23 +78,112 @@ def create_report_orchestration_xlsx( "PDS ODS", "Upload Status", "Reason", + "Sent to Review", "File Path", ], ) + @staticmethod + def _append_detail_sheet_rows( + worksheet, + records: list[BulkUploadReport], + ) -> None: for record in records: - ws.append( + worksheet.append( [ - record.get("NhsNumber"), - record.get("Date"), - record.get("UploaderOdsCode"), - record.get("PdsOdsCode"), - record.get("UploadStatus"), - record.get("Reason"), - record.get("FilePath"), + record.nhs_number, + record.date, + record.uploader_ods_code, + record.pds_ods_code, + record.upload_status, + record.reason, + record.sent_to_review, + record.file_path, ], ) - wb.save(output_path) - logger.info(f"Excel report written successfully for ods code {ods_code}") - return output_path + def _create_summary_sheet( + self, + workbook: Workbook, + ods_report: OdsReport, + ) -> None: + summary_ws = workbook.create_sheet(title="Summary") + + self._append_summary_headers(summary_ws) + self._append_summary_totals(summary_ws, ods_report) + self._append_summary_optional_totals(summary_ws, ods_report) + self._append_summary_reason_rows(summary_ws, ods_report) + + @staticmethod + def _append_summary_headers(worksheet) -> None: + worksheet.append(["Type", "Description", "Count"]) + + @staticmethod + def _append_summary_totals( + worksheet, + ods_report: OdsReport, + ) -> None: + worksheet.append( + ["Total", "Total Ingested", ods_report.get_total_ingested_count()], + ) + worksheet.append( + ["Total", "Total Successful", ods_report.get_total_successful()], + ) + worksheet.append( + ["Total", "Total In Review", ods_report.get_total_in_review_count()], + ) + worksheet.append( + ["Total", "Review Percentage", ods_report.get_total_in_review_percentage()], + ) + worksheet.append( + [ + "Total", + "Successful Percentage", + ods_report.get_total_successful_percentage(), + ], + ) + worksheet.append( + [ + "Total", + "Successful - Registered Elsewhere", + ods_report.get_total_registered_elsewhere_count(), + ], + ) + worksheet.append( + [ + "Total", + "Successful - Suspended", + ods_report.get_total_suspended_count(), + ], + ) + + @staticmethod + def _append_summary_optional_totals( + worksheet, + ods_report: OdsReport, + ) -> None: + if ods_report.get_total_deceased_count(): + worksheet.append( + [ + "Total", + "Successful - Deceased", + ods_report.get_total_deceased_count(), + ], + ) + + if ods_report.get_total_restricted_count(): + worksheet.append( + [ + "Total", + "Successful - Restricted", + ods_report.get_total_restricted_count(), + ], + ) + + @staticmethod + def _append_summary_reason_rows( + worksheet, + ods_report: OdsReport, + ) -> None: + for row in ods_report.get_unsuccessful_reasons_data_rows(): + worksheet.append(row) diff --git a/lambdas/services/reporting/report_orchestration_service.py b/lambdas/services/reporting/report_orchestration_service.py index b55e949f2..7d9065c83 100644 --- a/lambdas/services/reporting/report_orchestration_service.py +++ b/lambdas/services/reporting/report_orchestration_service.py @@ -1,7 +1,10 @@ import tempfile from collections import defaultdict -from typing import Dict +from typing import Dict, List +from pydantic import ValidationError + +from models.report.bulk_upload_report import BulkUploadReport from repositories.reporting.reporting_dynamo_repository import ReportingDynamoRepository from services.reporting.excel_report_generator_service import ExcelReportGenerator from utils.audit_logging_setup import LoggingService @@ -19,21 +22,38 @@ def process_reporting_window( window_start_ts: int, window_end_ts: int, ) -> Dict[str, str]: - records = self.repository.get_records_for_time_window( + filtered_records = self.repository.get_records_for_time_window( window_start_ts, window_end_ts, ) - if not records: + if not filtered_records: logger.info("No records found for reporting window") return {} - records = [ + filtered_records = [ record - for record in records + for record in filtered_records if "expedite" not in (record.get("FilePath") or "").lower() ] + if not filtered_records: + logger.info( + "No records found for reporting window after excluding expedite files", + ) + return {} + + records: List[BulkUploadReport] = [] + for record in filtered_records: + try: + records.append(BulkUploadReport.model_validate(record)) + except ValidationError as e: + logger.error(f"Skipping invalid record in reporting orchestration: {e}") + + if not records: + logger.info("No valid records after validation") + return {} + records_by_ods = self.group_records_by_ods(records) generated_files: Dict[str, str] = {} @@ -48,14 +68,20 @@ def process_reporting_window( return generated_files @staticmethod - def group_records_by_ods(records: list[dict]) -> dict[str, list[dict]]: + def group_records_by_ods( + records: List[BulkUploadReport], + ) -> dict[str, List[BulkUploadReport]]: grouped = defaultdict(list) for record in records: - ods_code = record.get("UploaderOdsCode") or "UNKNOWN" + ods_code = record.uploader_ods_code or "UNKNOWN" grouped[ods_code].append(record) return grouped - def generate_ods_report(self, ods_code: str, records: list[dict]) -> str: + def generate_ods_report( + self, + ods_code: str, + records: List[BulkUploadReport], + ) -> str: with tempfile.NamedTemporaryFile( suffix=f"_{ods_code}.xlsx", delete=False, diff --git a/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py b/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py index 30a46c3ba..6b932dc1e 100644 --- a/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py +++ b/lambdas/tests/unit/services/reporting/test_excel_report_generator_service.py @@ -1,6 +1,9 @@ +from types import SimpleNamespace + import pytest from freezegun import freeze_time from openpyxl import load_workbook + from services.reporting.excel_report_generator_service import ExcelReportGenerator @@ -10,11 +13,62 @@ def excel_report_generator(): @pytest.fixture -def make_report(tmp_path, excel_report_generator): +def mock_ods_report(mocker): + report = mocker.Mock(name="OdsReport") + report.get_total_ingested_count.return_value = 2 + report.get_total_successful.return_value = 1 + report.get_total_in_review_count.return_value = 1 + report.get_total_in_review_percentage.return_value = "50%" + report.get_total_successful_percentage.return_value = "50%" + report.get_total_registered_elsewhere_count.return_value = 0 + report.get_total_suspended_count.return_value = 0 + report.get_total_deceased_count.return_value = 0 + report.get_total_restricted_count.return_value = 0 + report.get_unsuccessful_reasons_data_rows.return_value = [ + ["Reason", "Invalid NHS number", 1], + ] + return report + + +@pytest.fixture +def make_record(): + def _make( + *, + nhs_number=None, + date=None, + uploader_ods_code=None, + pds_ods_code=None, + upload_status=None, + reason=None, + sent_to_review=None, + file_path=None, + ): + return SimpleNamespace( + nhs_number=nhs_number, + date=date, + uploader_ods_code=uploader_ods_code, + pds_ods_code=pds_ods_code, + upload_status=upload_status, + reason=reason, + sent_to_review=sent_to_review, + file_path=file_path, + ) + + return _make + + +@pytest.fixture +def make_report(tmp_path, excel_report_generator, mock_ods_report, mocker): def _make(*, ods_code="Y12345", records=None, filename="report.xlsx"): records = records or [] output_file = tmp_path / filename + mocked_build = mocker.patch.object( + excel_report_generator, + "_build_ods_report", + return_value=mock_ods_report, + ) + result = excel_report_generator.create_report_orchestration_xlsx( ods_code=ods_code, records=records, @@ -25,8 +79,12 @@ def _make(*, ods_code="Y12345", records=None, filename="report.xlsx"): assert output_file.exists() wb = load_workbook(output_file) - ws = wb.active - return output_file, ws + detail_ws = wb["Daily Upload Report"] + summary_ws = wb["Summary"] + + mocked_build.assert_called_once() + + return output_file, detail_ws, summary_ws return _make @@ -40,37 +98,46 @@ def expected_header_row(): "PDS ODS", "Upload Status", "Reason", + "Sent to Review", "File Path", ] @freeze_time("2025-01-01T12:00:00") -def test_create_report_orchestration_xlsx_happy_path(make_report, expected_header_row): +def test_create_report_orchestration_xlsx_happy_path( + make_report, + make_record, + expected_header_row, +): ods_code = "Y12345" records = [ - { - "ID": 1, - "Date": "2025-01-01", - "NhsNumber": "1234567890", - "UploaderOdsCode": "Y12345", - "PdsOdsCode": "A99999", - "UploadStatus": "SUCCESS", - "Reason": "", - "FilePath": "/path/file1.pdf", - }, - { - "ID": 2, - "Date": "2025-01-02", - "NhsNumber": "123456789", - "UploaderOdsCode": "Y12345", - "PdsOdsCode": "B88888", - "UploadStatus": "FAILED", - "Reason": "Invalid NHS number", - "FilePath": "/path/file2.pdf", - }, + make_record( + nhs_number="1234567890", + date="2025-01-01", + uploader_ods_code="Y12345", + pds_ods_code="A99999", + upload_status="SUCCESS", + reason=None, + sent_to_review=False, + file_path="/path/file1.pdf", + ), + make_record( + nhs_number="123456789", + date="2025-01-02", + uploader_ods_code="Y12345", + pds_ods_code="B88888", + upload_status="FAILED", + reason="Invalid NHS number", + sent_to_review=True, + file_path="/path/file2.pdf", + ), ] - _, ws = make_report(ods_code=ods_code, records=records, filename="report.xlsx") + _, ws, summary_ws = make_report( + ods_code=ods_code, + records=records, + filename="report.xlsx", + ) assert ws.title == "Daily Upload Report" assert ws["A1"].value == f"ODS Code: {ods_code}" @@ -86,6 +153,7 @@ def test_create_report_orchestration_xlsx_happy_path(make_report, expected_heade "A99999", "SUCCESS", None, + False, "/path/file1.pdf", ] @@ -96,34 +164,72 @@ def test_create_report_orchestration_xlsx_happy_path(make_report, expected_heade "B88888", "FAILED", "Invalid NHS number", + True, "/path/file2.pdf", ] + assert summary_ws.title == "Summary" + assert [cell.value for cell in summary_ws[1]] == ["Type", "Description", "Count"] + def test_create_report_orchestration_xlsx_with_no_records( make_report, expected_header_row, ): - _, ws = make_report(records=[], filename="empty_report.xlsx") + _, ws, summary_ws = make_report(records=[], filename="empty_report.xlsx") assert ws.max_row == 4 assert [cell.value for cell in ws[4]] == expected_header_row + assert [cell.value for cell in summary_ws[1]] == ["Type", "Description", "Count"] @pytest.mark.parametrize( "records, expected_row", [ ( - [{"ID": 1, "NhsNumber": "1234567890"}], - ["1234567890", None, None, None, None, None, None], + [ + SimpleNamespace( + nhs_number="1234567890", + date=None, + uploader_ods_code=None, + pds_ods_code=None, + upload_status=None, + reason=None, + sent_to_review=None, + file_path=None, + ), + ], + ["1234567890", None, None, None, None, None, None, None], ), ( - [{"Date": "2025-01-01"}], - [None, "2025-01-01", None, None, None, None, None], + [ + SimpleNamespace( + nhs_number=None, + date="2025-01-01", + uploader_ods_code=None, + pds_ods_code=None, + upload_status=None, + reason=None, + sent_to_review=None, + file_path=None, + ), + ], + [None, "2025-01-01", None, None, None, None, None, None], ), ( - [{"UploaderOdsCode": "Y12345", "UploadStatus": "SUCCESS"}], - [None, None, "Y12345", None, "SUCCESS", None, None], + [ + SimpleNamespace( + nhs_number=None, + date=None, + uploader_ods_code="Y12345", + pds_ods_code=None, + upload_status="SUCCESS", + reason=None, + sent_to_review=None, + file_path=None, + ), + ], + [None, None, "Y12345", None, "SUCCESS", None, None, None], ), ], ) @@ -133,7 +239,7 @@ def test_create_report_orchestration_xlsx_handles_missing_fields( records, expected_row, ): - _, ws = make_report(records=records, filename="partial.xlsx") + _, ws, _summary_ws = make_report(records=records, filename="partial.xlsx") assert [cell.value for cell in ws[4]] == expected_header_row assert [cell.value for cell in ws[5]] == expected_row diff --git a/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py b/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py index cabd3c45c..1028f4534 100644 --- a/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py +++ b/lambdas/tests/unit/services/reporting/test_report_orchestration_service.py @@ -1,7 +1,18 @@ +from types import SimpleNamespace + import pytest +from pydantic import ValidationError + from services.reporting.report_orchestration_service import ReportOrchestrationService +def make_validated_record(uploader_ods_code, record_id): + return SimpleNamespace( + uploader_ods_code=uploader_ods_code, + id=str(record_id), + ) + + @pytest.fixture def mock_repository(mocker): repo = mocker.Mock(name="ReportingDynamoRepositoryInstance") @@ -40,56 +51,64 @@ def mocked_generate(report_orchestration_service, mocker): @pytest.mark.parametrize( - "records, expected_generate_calls, expected_result", + "raw_records, validated_records, expected_generate_calls, expected_result", [ ( + [], [], [], {}, ), ( - [{"UploaderOdsCode": "X1", "ID": 1}], + [{"UploaderOdsCode": "X1", "ID": "1"}], + [make_validated_record("X1", "1")], [ - ("X1", [{"UploaderOdsCode": "X1", "ID": 1}]), + ("X1", [make_validated_record("X1", "1")]), ], {"X1": "/tmp/X1.xlsx"}, ), ( [ - {"UploaderOdsCode": "Y12345", "ID": 1}, - {"UploaderOdsCode": "Y12345", "ID": 2}, - {"UploaderOdsCode": "A99999", "ID": 3}, + {"UploaderOdsCode": "Y12345", "ID": "1"}, + {"UploaderOdsCode": "Y12345", "ID": "2"}, + {"UploaderOdsCode": "A99999", "ID": "3"}, + ], + [ + make_validated_record("Y12345", "1"), + make_validated_record("Y12345", "2"), + make_validated_record("A99999", "3"), ], [ ( "Y12345", [ - {"UploaderOdsCode": "Y12345", "ID": 1}, - {"UploaderOdsCode": "Y12345", "ID": 2}, + make_validated_record("Y12345", "1"), + make_validated_record("Y12345", "2"), ], ), - ("A99999", [{"UploaderOdsCode": "A99999", "ID": 3}]), + ("A99999", [make_validated_record("A99999", "3")]), ], {"Y12345": "/tmp/Y12345.xlsx", "A99999": "/tmp/A99999.xlsx"}, ), ( [ - {"UploaderOdsCode": "Y12345", "ID": 1}, - {"ID": 2}, - {"UploaderOdsCode": None, "ID": 3}, + {"UploaderOdsCode": "Y12345", "ID": "1"}, + {"UploaderOdsCode": None, "ID": "2"}, + {"UploaderOdsCode": "", "ID": "3"}, ], [ - ("Y12345", [{"UploaderOdsCode": "Y12345", "ID": 1}]), - ("UNKNOWN", [{"ID": 2}, {"UploaderOdsCode": None, "ID": 3}]), + make_validated_record("Y12345", "1"), + make_validated_record(None, "2"), + make_validated_record("", "3"), ], - {"Y12345": "/tmp/Y12345.xlsx", "UNKNOWN": "/tmp/UNKNOWN.xlsx"}, - ), - ( - [{"UploaderOdsCode": "", "ID": 1}], [ - ("UNKNOWN", [{"UploaderOdsCode": "", "ID": 1}]), + ("Y12345", [make_validated_record("Y12345", "1")]), + ( + "UNKNOWN", + [make_validated_record(None, "2"), make_validated_record("", "3")], + ), ], - {"UNKNOWN": "/tmp/UNKNOWN.xlsx"}, + {"Y12345": "/tmp/Y12345.xlsx", "UNKNOWN": "/tmp/UNKNOWN.xlsx"}, ), ], ) @@ -97,11 +116,19 @@ def test_process_reporting_window_behaviour( report_orchestration_service, mock_repository, mocked_generate, - records, + mocker, + raw_records, + validated_records, expected_generate_calls, expected_result, ): - mock_repository.get_records_for_time_window.return_value = records + mock_repository.get_records_for_time_window.return_value = raw_records + + validated_iter = iter(validated_records) + mocker.patch( + "services.reporting.report_orchestration_service.BulkUploadReport.model_validate", + side_effect=lambda _record: next(validated_iter), + ) result = report_orchestration_service.process_reporting_window(100, 200) @@ -114,35 +141,96 @@ def test_process_reporting_window_behaviour( assert result == expected_result +def test_process_reporting_window_skips_invalid_records( + report_orchestration_service, + mock_repository, + mocked_generate, + mocker, +): + raw_records = [ + {"UploaderOdsCode": "Y12345", "ID": "1"}, + {"UploaderOdsCode": "A99999", "ID": "2"}, + ] + mock_repository.get_records_for_time_window.return_value = raw_records + + valid_record = make_validated_record("Y12345", "1") + + mocker.patch( + "services.reporting.report_orchestration_service.BulkUploadReport.model_validate", + side_effect=[ + valid_record, + ValidationError.from_exception_data( + "BulkUploadReport", + [], + ), + ], + ) + + result = report_orchestration_service.process_reporting_window(100, 200) + + mocked_generate.assert_called_once_with("Y12345", [valid_record]) + assert result == {"Y12345": "/tmp/Y12345.xlsx"} + + +def test_process_reporting_window_returns_empty_when_all_records_invalid( + report_orchestration_service, + mock_repository, + mocked_generate, + mocker, +): + raw_records = [ + {"UploaderOdsCode": "Y12345", "ID": "1"}, + {"UploaderOdsCode": "A99999", "ID": "2"}, + ] + mock_repository.get_records_for_time_window.return_value = raw_records + + mocker.patch( + "services.reporting.report_orchestration_service.BulkUploadReport.model_validate", + side_effect=[ + ValidationError.from_exception_data("BulkUploadReport", []), + ValidationError.from_exception_data("BulkUploadReport", []), + ], + ) + + result = report_orchestration_service.process_reporting_window(100, 200) + + mocked_generate.assert_not_called() + assert result == {} + + @pytest.mark.parametrize( "records, expected", [ ( [ - {"UploaderOdsCode": "Y12345", "ID": 1}, - {"UploaderOdsCode": "Y12345", "ID": 2}, - {"UploaderOdsCode": "A99999", "ID": 3}, - {"ID": 4}, - {"UploaderOdsCode": None, "ID": 5}, + make_validated_record("Y12345", "1"), + make_validated_record("Y12345", "2"), + make_validated_record("A99999", "3"), + make_validated_record(None, "4"), + make_validated_record("", "5"), ], { "Y12345": [ - {"UploaderOdsCode": "Y12345", "ID": 1}, - {"UploaderOdsCode": "Y12345", "ID": 2}, + make_validated_record("Y12345", "1"), + make_validated_record("Y12345", "2"), + ], + "A99999": [make_validated_record("A99999", "3")], + "UNKNOWN": [ + make_validated_record(None, "4"), + make_validated_record("", "5"), ], - "A99999": [{"UploaderOdsCode": "A99999", "ID": 3}], - "UNKNOWN": [{"ID": 4}, {"UploaderOdsCode": None, "ID": 5}], }, ), ([], {}), ( - [{"UploaderOdsCode": "", "ID": 1}], - {"UNKNOWN": [{"UploaderOdsCode": "", "ID": 1}]}, + [make_validated_record("", "1")], + {"UNKNOWN": [make_validated_record("", "1")]}, ), ], ) def test_group_records_by_ods(records, expected): result = ReportOrchestrationService.group_records_by_ods(records) + assert dict(result) == expected @@ -166,7 +254,7 @@ def test_generate_ods_report_creates_excel_report_and_returns_path( fake_named_tmpfile, ): mocked_ntf, fake_tmp = fake_named_tmpfile - records = [{"ID": 1, "UploaderOdsCode": "Y12345"}] + records = [make_validated_record("Y12345", "1")] result_path = report_orchestration_service.generate_ods_report("Y12345", records)