NHSDigital
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 32 additions & 20 deletions b/‎Makefile‎
Lines changed: 32 additions & 20 deletions
diff --git a/‎scripts/seed_nft_tables.py‎
Lines changed: 53 additions & 27 deletions b/‎scripts/seed_nft_tables.py‎
Lines changed: 53 additions & 27 deletions
diff --git a/‎tests/performance/README.md‎
Lines changed: 37 additions & 15 deletions b/‎tests/performance/README.md‎
Lines changed: 37 additions & 15 deletions
diff --git a/‎tests/performance/consumer/client_perftest.js‎
Lines changed: 3 additions & 4 deletions b/‎tests/performance/consumer/client_perftest.js‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎tests/performance/consumer/consumer_reference_data.json‎
Lines changed: 0 additions & 9 deletions b/‎tests/performance/consumer/consumer_reference_data.json‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎…mance/producer/generate_distributions.py‎ ‎…mance/generate_producer_distributions.py‎tests/performance/producer/generate_distributions.py renamed to tests/performance/generate_producer_distributions.py
Lines changed: 1 addition & 1 deletion b/‎…mance/producer/generate_distributions.py‎ ‎…mance/generate_producer_distributions.py‎tests/performance/producer/generate_distributions.py renamed to tests/performance/generate_producer_distributions.py
Lines changed: 1 addition & 1 deletion
@@ -79,3 +79,4 @@ allure-report/*
 
 # Performance test ref data
 tests/performance/reference-data.json
+tests/performance/producer/expanded_pointer_distributions.json
@@ -17,6 +17,10 @@ HOST ?= $(TF_WORKSPACE_NAME).api.record-locator.$(ENV).national.nhs.uk
 ENV_TYPE ?= $(ENV)
 PERFTEST_TABLE_NAME ?= perftest
 PERFTEST_HOST ?= perftest-1.perftest.record-locator.national.nhs.uk
+PERFTEST_PATIENTS_WITH_POINTERS ?= 0
+PERFTEST_POINTERS_PER_PATIENT ?= 0
+PERFTEST_TYPE_DIST_PROFILE ?= default
+PERFTEST_CUSTODIAN_DIST_PROFILE ?= default
 
 export PATH := $(PATH):$(PWD)/.venv/bin
 export USE_SHARED_RESOURCES := $(shell poetry run python scripts/are_resources_shared_for_stack.py $(TF_WORKSPACE_NAME))
@@ -249,31 +253,39 @@ generate-models: check-warn ## Generate Pydantic Models
 		--output-model-type "pydantic_v2.BaseModel"
 
 
-generate-perftest-permissions: ## Generate perftest permissions and add to nrlf_permissions
-	poetry run python tests/performance/producer/generate_permissions.py --output_dir="$(DIST_PATH)/nrlf_permissions/K6PerformanceTest"
+perftest-generate-permissions: ## Generate perftest permissions and add to nrlf_permissions
+	PYTHONPATH=. poetry run python tests/performance/producer/generate_permissions.py --output_dir="$(DIST_PATH)/nrlf_permissions/K6PerformanceTest"
 
-perftest-producer:
+perftest-seed-tables:	## Seed tables and upload generated perftest input files to s3
+	@echo "Seeding performance test pointer tables with ENV=$(ENV) and PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and PERFTEST_PATIENTS_WITH_POINTERS=$(PERFTEST_PATIENTS_WITH_POINTERS) and PERFTEST_POINTERS_PER_PATIENT=$(PERFTEST_POINTERS_PER_PATIENT) and PERFTEST_TYPE_DIST_PROFILE=$(PERFTEST_TYPE_DIST_PROFILE) and PERFTEST_CUSTODIAN_DIST_PROFILE=$(PERFTEST_CUSTODIAN_DIST_PROFILE)"
+	rm -rf "${DIST_PATH}/nft"
+	mkdir -p "${DIST_PATH}/nft"
+	PYTHONPATH=. poetry run python ./scripts/seed_nft_tables.py --table_name=$(PERFTEST_TABLE_NAME) --patients_with_pointers=$(PERFTEST_PATIENTS_WITH_POINTERS) --pointers_per_patient=$(PERFTEST_POINTERS_PER_PATIENT) --type_dist_profile=$(PERFTEST_TYPE_DIST_PROFILE) --custodian_dist_profile=$(PERFTEST_CUSTODIAN_DIST_PROFILE)
+	zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
+	aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"
+
+perftest-prepare:	## Prepare input files for producer & consumer perf tests
+	@echo "Preparing performance tests with ENV=$(ENV) and PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
+	rm -rf "${DIST_PATH}/nft"
+	mkdir -p "${DIST_PATH}/nft"
+	aws s3 cp "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip"
+	unzip "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip"
+# 	cp "${DIST_PATH}/nft/seed-pointers-extract-${PERFTEST_TABLE_NAME}.csv" "${DIST_PATH}/seed-pointers-extract.csv"
+	PYTHONPATH=. poetry run python ./tests/performance/generate_producer_distributions.py
+
+perftest-producer:	## Run producer perf tests
 	@echo "Running producer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
 	k6 run tests/performance/producer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 
-perftest-consumer:
+perftest-consumer:	## Run consumer perf tests
 	@echo "Running consumer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
 	k6 run tests/performance/consumer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
 
-perftest-prep-generate-producer-data:
-	@echo "Generating producer reference with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
-	mkdir -p $(DIST_PATH)
-	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_producer_data --output_dir="$(DIST_PATH)"
-
-perftest-prep-extract-consumer-data:
-	@echo "Generating consumer reference with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
-	mkdir -p $(DIST_PATH)
-	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py extract_consumer_data --output_dir="$(DIST_PATH)"
-
-perftest-prep-generate-pointer-table-extract:
+perftest-generate-pointer-table-extract:	## Refresh the perf test input files in s3. Can be expensive to run on large tables
 	@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
-	mkdir -p $(DIST_PATH)
-	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="$(DIST_PATH)"
-
-perftest-prepare: perftest-prep-generate-producer-data perftest-prep-extract-consumer-data perftest-prep-generate-pointer-table-extract
-	@echo "Prepared performance tests with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
+	rm -rf "${DIST_PATH}/nft"
+	mkdir -p "${DIST_PATH}/nft"
+	PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft"
+	./scripts/get-current-info.sh > "${DIST_PATH}/nft/info.json"
+	zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
+	aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"
@@ -1,4 +1,5 @@
 import csv
+import os
 from datetime import datetime, timedelta, timezone
 from itertools import cycle
 from math import gcd
@@ -7,10 +8,9 @@
 
 import boto3
 import fire
-
-# import json
 import numpy as np
 
+from nrlf.core.boto import get_s3_client
 from nrlf.core.constants import (
     CATEGORY_ATTRIBUTES,
     SNOMED_SYSTEM_URL,
@@ -20,12 +20,16 @@
 from nrlf.core.dynamodb.model import DocumentPointer
 from nrlf.core.logger import logger
 from nrlf.tests.data import load_document_reference
+from tests.performance.perftest_environment import create_extract_metadata_file
 from tests.performance.seed_data_constants import (  # DEFAULT_COUNT_DISTRIBUTIONS,
     CHECKSUM_WEIGHTS,
     CUSTODIAN_DISTRIBUTION_PROFILES,
     TYPE_DISTRIBUTION_PROFILES,
 )
 
+dist_path = os.getenv("DIST_PATH", "./dist")
+nft_dist_path = f"{dist_path}/nft"
+
 dynamodb = boto3.client("dynamodb")
 resource = boto3.resource("dynamodb")
 
@@ -83,35 +87,56 @@ def _make_seed_pointer(
     return nft_pointer
 
 
+def _write_pointer_extract_to_file(table_name, pointer_data):
+    local_csv_out = f"{nft_dist_path}/seed-pointers-extract.csv"
+    local_meta_out = f"{nft_dist_path}/info.json"
+
+    print(f"writing pointer extract to files {local_csv_out} {local_meta_out}")
+
+    with open(local_csv_out, "w") as file:
+        writer = csv.writer(file)
+        writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"])
+        writer.writerows(pointer_data)
+    print(f"Pointer data saved to {local_csv_out}")
+
+    create_extract_metadata_file(table_name, nft_dist_path)
+
+
 def _populate_seed_table(
     table_name: str,
-    px_with_pointers: int,
-    pointers_per_px: float = 1.0,
+    patients_with_pointers: int,
+    pointers_per_patient: float = 1.0,
     type_dist_profile: str = "default",
     custodian_dist_profile: str = "default",
 ):
     """
     Seeds a table with example data for non-functional testing.
     """
-    if pointers_per_px < 1.0:
+    if pointers_per_patient < 1.0:
         raise ValueError("Cannot populate table with patients with zero pointers")
 
+    print(
+        f"Populating table {table_name} with patients_with_pointers={patients_with_pointers} pointers_per_patient={pointers_per_patient}",
+        type_dist_profile,
+        custodian_dist_profile,
+    )
+
     type_dists = TYPE_DISTRIBUTION_PROFILES[type_dist_profile]
     custodian_dists = CUSTODIAN_DISTRIBUTION_PROFILES[custodian_dist_profile]
 
     # set up iterations
     type_iter = _set_up_cyclical_iterator(type_dists)
     custodian_iters = _set_up_custodian_iterators(custodian_dists)
     count_iter = _get_pointer_count_poisson_distributions(
-        px_with_pointers, pointers_per_px
+        patients_with_pointers, pointers_per_patient
     )
     testnum_cls = TestNhsNumbersIterator()
     testnum_iter = iter(testnum_cls)
 
-    px_counter = 0
-    doc_ref_target = int(pointers_per_px * px_with_pointers)
+    patient_counter = 0
+    doc_ref_target = int(pointers_per_patient * patients_with_pointers)
     print(
-        f"Will upsert ~{doc_ref_target} test pointers for {px_with_pointers} patients."
+        f"Will upsert ~{doc_ref_target} test pointers for {patients_with_pointers} patients."
     )
     doc_ref_counter = 0
     batch_counter = 0
@@ -120,12 +145,15 @@ def _populate_seed_table(
     pointer_data: list[list[str]] = []
 
     start_time = datetime.now(tz=timezone.utc)
-
     batch_upsert_items: list[dict[str, Any]] = []
-    while px_counter < px_with_pointers:
-        pointers_for_px = int(next(count_iter))
 
-        if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers:
+    while patient_counter <= patients_with_pointers:
+        pointers_for_patient = int(next(count_iter))
+
+        if (
+            batch_counter + pointers_for_patient > 25
+            or patient_counter == patients_with_pointers
+        ):
             response = resource.batch_write_item(
                 RequestItems={table_name: batch_upsert_items}
             )
@@ -138,45 +166,43 @@ def _populate_seed_table(
             batch_upsert_items = []
             batch_counter = 0
 
-        new_px = next(testnum_iter)
-        for _ in range(pointers_for_px):
+        new_patient = next(testnum_iter)
+        for _ in range(pointers_for_patient):
             new_type = next(type_iter)
             new_custodian = next(custodian_iters[new_type])
             doc_ref_counter += 1
             batch_counter += 1
 
             pointer = _make_seed_pointer(
-                new_type, new_custodian, new_px, doc_ref_counter
+                new_type, new_custodian, new_patient, doc_ref_counter
             )
             put_req = {"PutRequest": {"Item": pointer.model_dump()}}
             batch_upsert_items.append(put_req)
             pointer_data.append(
                 [
                     pointer.id,
-                    pointer.type,
+                    new_type,  # not full type url
                     pointer.custodian,
                     pointer.nhs_number,
                 ]
             )
-        px_counter += 1
+        patient_counter += 1
 
-        if px_counter % 1000 == 0:
+        if patient_counter % 1000 == 0:
             print(".", end="", flush=True)
-        if px_counter % 100000 == 0:
-            print(f" {px_counter} patients processed ({doc_ref_counter} pointers).")
+        if patient_counter % 100000 == 0:
+            print(
+                f" {patient_counter} patients processed ({doc_ref_counter} pointers)."
+            )
 
-    print(" Done.")
+    print("Done")
 
     end_time = datetime.now(tz=timezone.utc)
     print(
         f"Created {doc_ref_counter} pointers in {timedelta.total_seconds(end_time - start_time)} seconds (unprocessed: {unprocessed_count})."
     )
 
-    with open("./dist/seed-nft-pointers.csv", "w") as f:
-        writer = csv.writer(f)
-        writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"])
-        writer.writerows(pointer_data)
-    print(f"Pointer data saved to ./dist/seed-nft-pointers.csv")  # noqa
+    _write_pointer_extract_to_file(table_name, pointer_data)
 
 
 def _set_up_cyclical_iterator(dists: dict[str, int]) -> Iterator[str]:
 
@@ -8,6 +8,13 @@ We have performance tests which give us a benchmark of how NRLF performs under l
 
 Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-1million (todo: update with real names!).
 
+#### Pull certs for perftest
+
+```sh
+assume nhsd-nrlf-mgmt
+make truststore-pull-all ENV=perftest
+```
+
 #### Point perftest at a different pointers table
 
 We (will) have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers.
@@ -29,44 +36,37 @@ Currently, this requires tearing down the existing environment and restoring fro
      2. once backed up, delete your table. In the AWS console: dynamodb > tables > your perftest table > actions > delete table
      3. Rerun the Deploy Account-wide infrastructure action.
      4. Terraform will create an empty table with the correct name & (most importantly!) read/write IAM policies.
-     5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code.
+     5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code & selecting the matching customer managed encryption key.
 6. Run the [Persistent Environment Deploy](https://github.com/NHSDigital/NRLF/actions/workflows/persistent-environment.yml) workflow against your branch & `perftest` to restore the environment with lambdas pointed at your chosen table.
 7. You can check this has been successful by checking the table name in the lambdas.
    - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table`
 
 If you've followed these steps, you will also need to [generate permissions](#generate-permissions) as the organisation permissions will have been lost when the environment was torn down.
 
-### Prepare to run tests
-
-#### Pull certs for perftest
-
-```sh
-assume management
-make truststore-pull-all ENV=perftest
-```
-
 #### Generate permissions
 
 You will need to generate pointer permissions the first time performance tests are run in an environment e.g. if the perftest environment is destroyed & recreated.
 
 ```sh
 # In project root
-make generate permissions   # makes a bunch of json permission files for test organisations
+make perftest-generate-permissions   # makes a bunch of json permission files for test organisations
 make build  # will take all permissions & create nrlf_permissions.zip file
 
 # apply this new permissions zip file to your environment
 cd ./terraform/infrastructure
-assume nhsd-nrlf-test
+assume nhsd-nrlf-mgmt
 make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest
 make ENV=perftest USE_SHARED_RESOURCES=true apply
 ```
 
-#### Generate input files
+### Prepare to run tests
+
+Prepare input files
 
 ```sh
 assume nhsd-nrlf-test
-# creates 2 csv files and a json file
-make perftest-prepare PERFTEST_TABLE_NAME=perftest-baseline
+# PERFTEST_TABLE_NAME = pointer table currently pointed to by perftest env
+make perftest-prepare PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-baseline-pointers-table ENV=perftest
 ```
 
 ### Run tests
@@ -76,6 +76,28 @@ make perftest-consumer ENV_TYPE=perftest PERFTEST_HOST=perftest-1.perftest.recor
 make perftest-producer ENV_TYPE=perftest PERFTEST_HOST=perftest-1.perftest.record-locator.national.nhs.uk
 ```
 
+## Seed data
+
+Must be run on an empty table. Cannot top up an existing set of pointers.
+
+```sh
+make perftest-seed-tables ENV=perftest \
+   PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table \
+   PERFTEST_PATIENTS_WITH_POINTERS=10 \
+   PERFTEST_POINTERS_PER_PATIENT=2
+```
+
+### Refresh input files in S3
+
+Regenerates the input files from the current state of a given perftest table & uploads files to s3. These files are usually generated at the end of the seed tables make command (above).
+
+> Note: this can be an expensive operation for large table sizes.
+
+```sh
+make perftest-generate-pointer-table-extract \
+   PERFTEST_TABLE_NAME=nhsd-nrlf--perftest-anjali-test-2-pointers-table
+```
+
 ## Assumptions / Caveats
 
 - Run performance tests in the perftest environment only\*
 
@@ -3,9 +3,8 @@ import { check } from "k6";
 import exec from "k6/execution";
 import { CATEGORY_TYPE_GROUPS } from "../type-category-mappings.js";
 
-const csvPath = __ENV.DIST_PATH
-  ? `../../../${__ENV.DIST_PATH}/producer_reference_data.csv`
-  : "../producer_reference_data.csv";
+const distPath = __ENV.DIST_PATH || "./dist";
+const csvPath = `../../../${distPath}/nft/seed-pointers-extract.csv`;
 const csv = open(csvPath);
 const lines = csv.trim().split("\n");
 // Skip header
@@ -16,7 +15,7 @@ function getNextPointer() {
   const iter = exec.vu.iterationInScenario;
   const index = iter % dataLines.length;
   const line = dataLines[index];
-  const [count, pointer_id, pointer_type, custodian, nhs_number] = line
+  const [pointer_id, pointer_type, custodian, nhs_number] = line
     .split(",")
     .map((field) => field.trim());
   return { pointer_id, pointer_type, nhs_number };
 
@@ -24,7 +24,7 @@ def expand_distribution(dist):
 
 output = {"types": expanded_types, "custodians": expanded_custodians}
 
-out_path = Path("./tests/performance/expanded_pointer_distributions.json")
+out_path = Path("./tests/performance/producer/expanded_pointer_distributions.json")
 out_path.parent.mkdir(parents=True, exist_ok=True)
 with out_path.open("w") as f:
     json.dump(output, f, indent=2)
Original file line number	Diff line number	Diff line change
`@@ -79,3 +79,4 @@ allure-report/*`
`79`	`79`
`80`	`80`	`# Performance test ref data`
`81`	`81`	`tests/performance/reference-data.json`
	`82`	`+tests/performance/producer/expanded_pointer_distributions.json`