NHSDigital
diff --git a/‎Makefile‎
Lines changed: 9 additions & 0 deletions b/‎Makefile‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 5 additions & 6 deletions b/‎pyproject.toml‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎src/dve/core_engine/backends/base/auditing.py‎
Lines changed: 34 additions & 40 deletions b/‎src/dve/core_engine/backends/base/auditing.py‎
Lines changed: 34 additions & 40 deletions
diff --git a/‎src/dve/core_engine/backends/base/backend.py‎
Lines changed: 9 additions & 8 deletions b/‎src/dve/core_engine/backends/base/backend.py‎
Lines changed: 9 additions & 8 deletions
@@ -27,6 +27,15 @@ coverage:
 	$(activate) coverage report
 	$(activate) coverage xml
 
+# lint
+pylint:
+	${activate} pylint src/
+
+mypy:
+	${activate} mypy src/
+
+lint: mypy pylint
+
 # pre-commit
 pre-commit-all:
 	${activate} pre-commit run --all-files
@@ -71,9 +71,9 @@ optional = true
 
 [tool.poetry.group.lint.dependencies]
 black = "22.6.0"
-astroid = "2.11.7"
+astroid = "2.14.2"
 isort = "5.11.5"
-pylint = "2.14.5"
+pylint = "2.16.4"
 mypy = "0.982"
 boto3-stubs = {extras = ["essential"], version = "1.26.72"}
 botocore-stubs = "1.29.72"
@@ -126,9 +126,8 @@ source_pkgs = [
 show_missing = true
 
 [tool.pylint]
-# Can't add support for custom checker until running on Python 3.9+ again.
-# init-hook = "import sys; sys.path.append('./pylint_checkers')"
-# load-plugins = "check_typing_imports"
+init-hook = "import sys; sys.path.append('./pylint_checkers')"
+load-plugins = "check_typing_imports"
 
 [tool.pylint.main]
 # Analyse import fallback blocks. This can be used to support both Python 2 and 3
@@ -203,7 +202,7 @@ persistent = true
 
 # Minimum Python version to use for version dependent checks. Will default to the
 # version used to run pylint.
-py-version = "3.7"
+py-version = "3.11"
 
 # Discover python modules and packages in the file system subtree.
 # recursive =
 
@@ -5,6 +5,7 @@
 import threading
 import time
 from abc import abstractmethod
+from collections.abc import Callable, Iterable
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import dataclass
 from datetime import date, datetime, timedelta
@@ -13,16 +14,9 @@
 from types import TracebackType
 from typing import (
     Any,
-    Callable,
     ClassVar,
-    Dict,
     Generic,
-    Iterable,
-    List,
     Optional,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -55,7 +49,7 @@ class FilterCriteria:
     field: str
     comparison_value: Any
     operator_: Callable = operator.eq
-    operator_mapping: ClassVar[Dict[BinaryComparator, str]] = {
+    operator_mapping: ClassVar[dict[BinaryComparator, str]] = {
         operator.eq: "=",
         operator.ne: "!=",
         operator.lt: "<",
@@ -102,12 +96,12 @@ class BaseAuditor(Generic[AuditReturnType]):
     """Base auditor object - defines structure for implementations to use
     in conjunction with AuditingManager"""
 
-    def __init__(self, name: str, record_type: Type[AuditRecord]):
+    def __init__(self, name: str, record_type: type[AuditRecord]):
         self._name = name
         self._record_type = record_type
 
     @property
-    def schema(self) -> Dict[str, type]:
+    def schema(self) -> dict[str, type]:
         """Determine python schema of auditor"""
         return {
             fld: str if get_origin(mdl.type_) == Literal else mdl.type_
@@ -135,27 +129,27 @@ def conv_to_records(self, recs: AuditReturnType) -> Iterable[AuditRecord]:
         raise NotImplementedError()
 
     @abstractmethod
-    def conv_to_entity(self, recs: List[AuditRecord]) -> AuditReturnType:
+    def conv_to_entity(self, recs: list[AuditRecord]) -> AuditReturnType:
         """Convert the list of pydantic models to an entity for use in pipelines"""
         raise NotImplementedError()
 
     @abstractmethod
-    def add_records(self, records: Iterable[Dict[str, Any]]):
+    def add_records(self, records: Iterable[dict[str, Any]]):
         """Add audit records to the Auditor"""
         raise NotImplementedError()
 
     @abstractmethod
     def retrieve_records(
-        self, filter_criteria: List[FilterCriteria], data: Optional[AuditReturnType] = None
+        self, filter_criteria: list[FilterCriteria], data: Optional[AuditReturnType] = None
     ) -> AuditReturnType:
         """Retrieve audit records from the Auditor"""
         raise NotImplementedError()
 
     def get_most_recent_records(
         self,
-        order_criteria: List[OrderCriteria],
-        partition_fields: Optional[List[str]] = None,
-        pre_filter_criteria: Optional[List[FilterCriteria]] = None,
+        order_criteria: list[OrderCriteria],
+        partition_fields: Optional[list[str]] = None,
+        pre_filter_criteria: Optional[list[FilterCriteria]] = None,
     ) -> AuditReturnType:
         """Retrieve the most recent records, defined by the ordering criteria
         for each partition combination"""
@@ -203,12 +197,12 @@ def combine_auditor_information(
         raise NotImplementedError()
 
     @staticmethod
-    def conv_to_iterable(recs: Union[AuditorType, AuditReturnType]) -> Iterable[Dict[str, Any]]:
+    def conv_to_iterable(recs: Union[AuditorType, AuditReturnType]) -> Iterable[dict[str, Any]]:
         """Convert AuditReturnType to iterable of dictionaries"""
         raise NotImplementedError()
 
     @validate_arguments
-    def add_processing_records(self, processing_records: List[ProcessingStatusRecord]):
+    def add_processing_records(self, processing_records: list[ProcessingStatusRecord]):
         """Add an entry to the processing_status auditor."""
         if self.pool:
             return self._submit(
@@ -220,7 +214,7 @@ def add_processing_records(self, processing_records: List[ProcessingStatusRecord
         )
 
     @validate_arguments
-    def add_submission_statistics_records(self, sub_stats: List[SubmissionStatisticsRecord]):
+    def add_submission_statistics_records(self, sub_stats: list[SubmissionStatisticsRecord]):
         """Add an entry to the submission statistics auditor."""
         if self.pool:
             return self._submit(
@@ -230,7 +224,7 @@ def add_submission_statistics_records(self, sub_stats: List[SubmissionStatistics
         return self._submission_statistics.add_records(records=[dict(rec) for rec in sub_stats])
 
     @validate_arguments
-    def add_transfer_records(self, transfer_records: List[TransferRecord]):
+    def add_transfer_records(self, transfer_records: list[TransferRecord]):
         """Add an entry to the transfers auditor"""
         if self.pool:
             return self._submit(
@@ -241,7 +235,7 @@ def add_transfer_records(self, transfer_records: List[TransferRecord]):
     @validate_arguments
     def add_new_submissions(
         self,
-        submissions: List[SubmissionMetadata],
+        submissions: list[SubmissionMetadata],
         job_run_id: Optional[int] = None,
     ):
         """Add an entry to the submission_info auditor."""
@@ -250,8 +244,8 @@ def add_new_submissions(
         time_now: datetime = datetime.now()
         ts_info = {"time_updated": time_now, "date_updated": time_now.date()}
 
-        processing_status_recs: List[Dict[str, Any]] = []
-        submission_info_recs: List[Dict[str, Any]] = []
+        processing_status_recs: list[dict[str, Any]] = []
+        submission_info_recs: list[dict[str, Any]] = []
 
         for sub_info in submissions:
             # add processing_record - add time info
@@ -311,7 +305,7 @@ def is_writing(self) -> bool:
 
         return not self.queue.empty() or locked
 
-    def mark_transform(self, submission_ids: List[str], **kwargs):
+    def mark_transform(self, submission_ids: list[str], **kwargs):
         """Update submission processing_status to file_transformation."""
 
         recs = [
@@ -323,7 +317,7 @@ def mark_transform(self, submission_ids: List[str], **kwargs):
 
         return self.add_processing_records(recs)
 
-    def mark_data_contract(self, submission_ids: List[str], **kwargs):
+    def mark_data_contract(self, submission_ids: list[str], **kwargs):
         """Update submission processing_status to data_contract."""
 
         recs = [
@@ -335,7 +329,7 @@ def mark_data_contract(self, submission_ids: List[str], **kwargs):
 
         return self.add_processing_records(recs)
 
-    def mark_business_rules(self, submissions: List[Tuple[str, bool]], **kwargs):
+    def mark_business_rules(self, submissions: list[tuple[str, bool]], **kwargs):
         """Update submission processing_status to business_rules."""
 
         recs = [
@@ -352,11 +346,11 @@ def mark_business_rules(self, submissions: List[Tuple[str, bool]], **kwargs):
 
     def mark_error_report(
         self,
-        submissions: List[Tuple[str, SubmissionResult]],
+        submissions: list[tuple[str, SubmissionResult]],
         job_run_id: Optional[int] = None,
     ):
         """Mark the given submission as being ready for error report"""
-        processing_recs: List[ProcessingStatusRecord] = []
+        processing_recs: list[ProcessingStatusRecord] = []
 
         sub_id: str
         sub_result: str
@@ -373,7 +367,7 @@ def mark_error_report(
 
         return self.add_processing_records(processing_recs)
 
-    def mark_finished(self, submissions: List[Tuple[str, SubmissionResult]], **kwargs):
+    def mark_finished(self, submissions: list[tuple[str, SubmissionResult]], **kwargs):
         """Update submission processing_status to finished."""
 
         recs = [
@@ -388,7 +382,7 @@ def mark_finished(self, submissions: List[Tuple[str, SubmissionResult]], **kwarg
 
         return self.add_processing_records(recs)
 
-    def mark_failed(self, submissions: List[str], **kwargs):
+    def mark_failed(self, submissions: list[str], **kwargs):
         """Update submission processing_status to failed."""
         recs = [
             ProcessingStatusRecord(
@@ -399,7 +393,7 @@ def mark_failed(self, submissions: List[str], **kwargs):
 
         return self.add_processing_records(recs)
 
-    def mark_archived(self, submissions: List[str], **kwargs):
+    def mark_archived(self, submissions: list[str], **kwargs):
         """Update submission processing_status to archived."""
         recs = [
             ProcessingStatusRecord(
@@ -410,7 +404,7 @@ def mark_archived(self, submissions: List[str], **kwargs):
 
         return self.add_processing_records(recs)
 
-    def add_feedback_transfer_ids(self, submissions: List[Tuple[str, str]], **kwargs):
+    def add_feedback_transfer_ids(self, submissions: list[tuple[str, str]], **kwargs):
         """Adds transfer_id for error report to submission"""
         recs = [
             TransferRecord(
@@ -425,7 +419,7 @@ def add_feedback_transfer_ids(self, submissions: List[Tuple[str, str]], **kwargs
         return self.add_transfer_records(recs)
 
     def get_latest_processing_records(
-        self, filter_criteria: Optional[List[FilterCriteria]] = None
+        self, filter_criteria: Optional[list[FilterCriteria]] = None
     ) -> AuditReturnType:
         """Get the most recent processing record for each submission_id stored in
         the processing_status auditor"""
@@ -441,18 +435,18 @@ def downstream_pending(
         max_concurrency: int = 1,
         run_number: int = 0,
         max_days_old: int = 3,
-        statuses_to_include: Optional[List[ProcessingStatus]] = None,
+        statuses_to_include: Optional[list[ProcessingStatus]] = None,
     ) -> bool:
         """Checks if there are any downstream submissions currently pending"""
-        steps: List[ProcessingStatus] = [
+        steps: list[ProcessingStatus] = [
             "received",
             "file_transformation",
             "data_contract",
             "business_rules",
             "error_report",
         ]
 
-        downstream: Set[ProcessingStatus]
+        downstream: set[ProcessingStatus]
         if statuses_to_include:
             downstream = {status, *statuses_to_include}
         else:
@@ -519,7 +513,7 @@ def __enter__(self):
 
     def __exit__(
         self,
-        exc_type: Optional[Type[Exception]],
+        exc_type: Optional[type[Exception]],
         exc_value: Optional[Exception],
         traceback: Optional[TracebackType],
     ) -> None:
@@ -532,7 +526,7 @@ def __exit__(
 
     def _get_status(
         self,
-        status: Union[ProcessingStatus, Set[ProcessingStatus], List[ProcessingStatus]],
+        status: Union[ProcessingStatus, set[ProcessingStatus], list[ProcessingStatus]],
         max_days_old: int,
     ) -> AuditReturnType:
         _filter = [
@@ -572,8 +566,8 @@ def get_all_error_report_submissions(self, max_days_old: int = 3):
             self.combine_auditor_information(subs, self._submission_info)
         )
 
-        processed: List[SubmissionInfo] = []
-        dodgy_info: List[Tuple[Dict, str]] = []
+        processed: list[SubmissionInfo] = []
+        dodgy_info: list[tuple[dict, str]] = []
 
         for sub_info in sub_infos:
             try:
 
@@ -3,7 +3,8 @@
 import logging
 import warnings
 from abc import ABC, abstractmethod
-from typing import Any, ClassVar, Dict, Generic, Mapping, MutableMapping, Optional, Tuple, Type
+from collections.abc import Mapping, MutableMapping
+from typing import Any, ClassVar, Generic, Optional
 
 from pyspark.sql import DataFrame, SparkSession
 
@@ -28,7 +29,7 @@
 class BaseBackend(Generic[EntityType], ABC):
     """A complete implementation of a backend."""
 
-    __entity_type__: ClassVar[Type[EntityType]]  # type: ignore
+    __entity_type__: ClassVar[type[EntityType]]  # type: ignore
     """
     The entity type used within the backend.
 
@@ -45,7 +46,7 @@ def __init__(  # pylint: disable=unused-argument
         self,
         contract: BaseDataContract[EntityType],
         steps: BaseStepImplementations[EntityType],
-        reference_data_loader_type: Optional[Type[BaseRefDataLoader[EntityType]]],
+        reference_data_loader_type: Optional[type[BaseRefDataLoader[EntityType]]],
         logger: Optional[logging.Logger] = None,
         **kwargs: Any,
     ) -> None:
@@ -76,7 +77,7 @@ def __init__(  # pylint: disable=unused-argument
 
     def load_reference_data(
         self,
-        reference_entity_config: Dict[EntityName, ReferenceConfigUnion],
+        reference_entity_config: dict[EntityName, ReferenceConfigUnion],
         submission_info: Optional[SubmissionInfo],
     ) -> Mapping[EntityName, EntityType]:
         """Load the reference data as specified in the reference entity config."""
@@ -115,7 +116,7 @@ def write_entities_to_parquet(
 
     def convert_entities_to_spark(
         self, entities: Entities, cache_prefix: URI, _emit_deprecation_warning: bool = True
-    ) -> Dict[EntityName, DataFrame]:
+    ) -> dict[EntityName, DataFrame]:
         """Convert entities to Spark DataFrames.
 
         Entities may be omitted if they are blank, because Spark cannot create an
@@ -151,7 +152,7 @@ def apply(
         contract_metadata: DataContractMetadata,
         rule_metadata: RuleMetadata,
         submission_info: Optional[SubmissionInfo] = None,
-    ) -> Tuple[Entities, Messages, StageSuccessful]:
+    ) -> tuple[Entities, Messages, StageSuccessful]:
         """Apply the data contract and the rules, returning the entities and all
         generated messages.
 
@@ -184,7 +185,7 @@ def process(
         rule_metadata: RuleMetadata,
         cache_prefix: URI,
         submission_info: Optional[SubmissionInfo] = None,
-    ) -> Tuple[MutableMapping[EntityName, URI], Messages]:
+    ) -> tuple[MutableMapping[EntityName, URI], Messages]:
         """Apply the data contract and the rules, write the entities out to parquet
         and returning the entity locations and all generated messages.
 
@@ -205,7 +206,7 @@ def process_legacy(
         rule_metadata: RuleMetadata,
         cache_prefix: URI,
         submission_info: Optional[SubmissionInfo] = None,
-    ) -> Tuple[MutableMapping[EntityName, DataFrame], Messages]:
+    ) -> tuple[MutableMapping[EntityName, DataFrame], Messages]:
         """Apply the data contract and the rules, create Spark `DataFrame`s from the
         entities and return the Spark entities and all generated messages.