From cc7bc76ed5602f528c82feedbf11eba22aa25dcb Mon Sep 17 00:00:00 2001 From: humulla <43747248+humulla@users.noreply.github.com> Date: Tue, 25 Mar 2025 08:13:54 +0000 Subject: [PATCH 1/3] WIP --- .gitignore | 3 +- .../asset_table/get_assets_by_prop_ref.py | 2 +- .../asset_table/update_property_patch.py | 94 +++++++++++++++++++ .../opensearch/housing_search/dummy-data.py | 31 ++++++ aws/database/rds/repairs/BonusCalc/Makefile | 57 +++++++++++ requirements.txt | 2 +- 6 files changed, 186 insertions(+), 3 deletions(-) create mode 100644 aws/database/dynamodb/scripts/asset_table/update_property_patch.py create mode 100644 aws/database/opensearch/housing_search/dummy-data.py create mode 100644 aws/database/rds/repairs/BonusCalc/Makefile diff --git a/.gitignore b/.gitignore index dc9f375..b4b05c9 100755 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ venv config.py -stored_procedures \ No newline at end of file +stored_procedures +.vs/ diff --git a/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py b/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py index 739f60a..93c6631 100644 --- a/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py +++ b/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py @@ -69,7 +69,7 @@ def set_id_in_csv_with_asset_id(asset_table: Table, assets_from_csv: list[dict]) def main(): table = get_dynamodb_table(Config.TABLE_NAME, Config.STAGE) - _file_path = "aws\src\database\data\input\Property_extract_from_Nick_May_2023 - Copy of property_active_edit_09_06 (1).csv" + _file_path = r"aws\src\database\data\input\update_property_patch_march_2025.csv" asset_csv_data = csv_to_dict_list(_file_path) # Note: Writing to TSV which can be imported into Google Sheets diff --git a/aws/database/dynamodb/scripts/asset_table/update_property_patch.py b/aws/database/dynamodb/scripts/asset_table/update_property_patch.py new file mode 100644 index 0000000..6446449 --- /dev/null +++ b/aws/database/dynamodb/scripts/asset_table/update_property_patch.py @@ -0,0 +1,94 @@ +# 1. Get asset record by asset id (prop ref) - api endpoint +# 2. Get patch record by patch name on csv - api endpoint +# 3. update patch id and area id in asset record to match patch on csv +# 4. loop for all records in csv +from dataclasses import dataclass + +from mypy_boto3_dynamodb.service_resource import Table + +from aws.database.dynamodb.utils.get_by_secondary_index import get_by_secondary_index +from aws.database.dynamodb.utils.get_dynamodb_table import get_dynamodb_table +from aws.utils.csv_to_dict_list import csv_to_dict_list +from get_assets_by_prop_ref import main +from aws.utils.logger import Logger +from enums.enums import Stage +from aws.utils.progress_bar import ProgressBar +from aws.database.domain.dynamo_domain_objects import Patch +from aws.database.domain.dynamo_domain_objects import Asset + + + +@dataclass +class Config: + TABLE_NAME = "PatchesAndAreas" + OUTPUT_CLASS = Patch + LOGGER = Logger() + STAGE = Stage.HOUSING_STAGING + ITEM_COUNT_LIMIT = 10 # Set to None to return all items + +class AssetConfig: + TABLE_NAME = "Assets" + OUTPUT_CLASS = Asset + LOGGER = Logger() + STAGE = Stage.HOUSING_STAGING + ITEM_COUNT_LIMIT = 10 # Set to None to return all items + +def get_patch_record_by_patch_name(patchesAndAreas_table: Table, patch_from_csv: list[dict]) -> list[dict]: + """ + Iterate through the asset and set the id for each record + :return: ****TO UPDATE*** A dictionary containing the updated assets and the assets that failed to update + """ + progress_bar = ProgressBar(len(patch_from_csv)) + + for i, patch_item in enumerate(patch_from_csv): + if isinstance(patch_item["patch_pk"], str) and len(patch_item["patch_pk"]) > 0: + continue + else: + if i % 10 == 0: + progress_bar.display(i) + patch_name = patch_item["name"] + if patch_name is None: + patch_name["failed_reason"] = f"Invalid patchName: {patch_item['name']}. " + continue + results = get_by_secondary_index(patchesAndAreas_table, "patchName", "name", patch_name) + if len(results) > 1: + patch_item["failed_reason"] = f"Multiple patches found for patchName {patch_item['name']}. " + continue + elif len(results) == 0: + patch_item["failed_reason"] = f"No patch found for patchName {patch_item['name']}. " + continue + # update patch id and area id in asset record to match patch on csv + patch = results[0] + patch_id = patch.get("id") + area_id = patch.get("parentId") + prop_ref = patch_item["prop_ref"] + table = get_dynamodb_table(AssetConfig.TABLE_NAME, AssetConfig.STAGE) + update_areaid_patchid_in_asset(patch_id, area_id, prop_ref, table, AssetConfig.LOGGER) + patch_from_csv[i]["patch_pk"] = patch.get("id") + + return patch_from_csv + +def update_areaid_patchid_in_asset(patch_id: str | None, area_id: str | None, prop_ref :str, assets_table: Table, logger: Logger): + if len(prop_ref) is None: + logger.log(f'Could not find asset with prop ref {prop_ref}') + asset = get_by_secondary_index(assets_table, "AssetId", "assetId", prop_ref) + if (len(asset) > 0): + asset_record = asset[0] + asset_record["patchId"] = patch_id if patch_id else None + asset_record["areaId"] = area_id if area_id else None + logger.log(f'updating areaid to {asset_record["areaId"]} and patchId to {asset_record["patchId"]} for prop_ref {prop_ref}') + assets_table.put_item(Item=asset_record) + + + +def update_property_patch(): + # 1. Get asset record by asset id (prop ref) + assets_by_prop_ref = main() + # 2. Get patch record by patch name on csv - api endpoint + table = get_dynamodb_table(Config.TABLE_NAME, Config.STAGE) + _file_path = r"aws\src\database\data\input\update_property_patch_march_2025.csv" + patch_csv_data = csv_to_dict_list(_file_path) + get_by_patch_name = get_patch_record_by_patch_name(table, patch_csv_data) + +if __name__ == "__main__": + update_property_patch() \ No newline at end of file diff --git a/aws/database/opensearch/housing_search/dummy-data.py b/aws/database/opensearch/housing_search/dummy-data.py new file mode 100644 index 0000000..ab0ea47 --- /dev/null +++ b/aws/database/opensearch/housing_search/dummy-data.py @@ -0,0 +1,31 @@ +import json +from elasticsearch import Elasticsearch + + +DUMB_ITEMS = [] +with open("assets.json", "r") as f: + DUMB_ITEMS = json.load(f) + + +def elastic_search(): + index = "assets" + # create an elasticsearch client + es = Elasticsearch([{"host": "localhost", "port": 9200}]) + + # check connection + if not es.ping(): + raise ValueError("Connection failed") + + # create index if it does not exist + if not es.indices.exists(index): + es.indices.create(index=index) + + # clear all data in the index + es.delete_by_query(index=index, body={"query": {"match_all": {}}}) + + for item in DUMB_ITEMS: + es.index(index=index, id=item["id"], body=item) + + +if __name__ == "__main__": + elastic_search() \ No newline at end of file diff --git a/aws/database/rds/repairs/BonusCalc/Makefile b/aws/database/rds/repairs/BonusCalc/Makefile new file mode 100644 index 0000000..0eceb15 --- /dev/null +++ b/aws/database/rds/repairs/BonusCalc/Makefile @@ -0,0 +1,57 @@ +.ONESHELL: +# Requires AWS CLI Profile matching housing-${ENVIRONMENT} to be set up +# Requires AWS Session Manager Plugin to be installed: +# https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager-working-with-install-plugin.html +# On Windows you will need to run these commands using Git Bash, NOT Powershell / CMD + + +# -- Configuration -- +# Set the local port to use for the port forwarding - connect to this port on your local machine to connect to the RDS +LOCAL_PORT = 6005 +# For Parameter store URL Paths +STAGE := staging +# Set to AWSCLI Profile names +PROFILE := "housing-${STAGE}" + +# -- Parameter Store paths -- +JUMP_BOX_INSTANCE_NAME_PATH:="platform-apis-jump-box-instance-name" +POSTGRES_HOST_PATH:=" /bonuscalc-api/${STAGE}/postgres-hostname" +POSTGRES_PORT_PATH:=" /bonuscalc-api/${STAGE}/postgres-port" +POSTGRES_USERNAME_PATH:=" /bonuscalc-api/${STAGE}/postgres-username" +POSTGRES_PASSWORD_PATH:=" /bonuscalc-api/${STAGE}/postgres-password" + +# -- Parameters -- +# Get parameters from parameter store for the profile used +INSTANCE_ID := $(shell aws ssm get-parameter --name ${JUMP_BOX_INSTANCE_NAME_PATH} --region "eu-west-2" --profile ${PROFILE} --query Parameter.Value --output text) +REMOTE_HOST := $(shell aws ssm get-parameter --name ${POSTGRES_HOST_PATH} --region "eu-west-2" --profile ${PROFILE} --query Parameter.Value --output text) +REMOTE_PORT := $(shell aws ssm get-parameter --name ${POSTGRES_PORT_PATH} --region "eu-west-2" --profile ${PROFILE} --query Parameter.Value --output text) +REMOTE_USERNAME := $(shell aws ssm get-parameter --name ${POSTGRES_USERNAME_PATH} --region "eu-west-2" --profile ${PROFILE} --query Parameter.Value --output text) +REMOTE_PASSWORD := $(shell aws ssm get-parameter --with-decryption --name ${POSTGRES_PASSWORD_PATH} --region "eu-west-2" --profile ${PROFILE} --query Parameter.Value --output text) + +DATABASE_PARAMS = '{"host":["${REMOTE_HOST}"], "portNumber":["${REMOTE_PORT}"], "localPortNumber":["${LOCAL_PORT}"]}' + +# -- Commands -- + +# Use this command to login to the AWS SSO service +# This is required to use the ssm commands +sso_login: + if (aws sts get-caller-identity --profile ${PROFILE}) + then + echo "Session still valid" + else + echo "Session expired, logging in" + aws sso login --profile ${PROFILE} + fi + +# Use this command to connect to create a port forwarding session from localhost to the RDS instance via the jump-box +# This will allow connecting to the database using a GUI tool like pgAdmin, or with local scripts +port_forwarding_to_bonus_calc: + echo USERNAME: ${REMOTE_USERNAME} + echo PASSWORD: ${REMOTE_PASSWORD} + + aws ssm start-session \ + --target ${INSTANCE_ID} \ + --region eu-west-2 \ + --profile ${PROFILE} \ + --document-name AWS-StartPortForwardingSessionToRemoteHost \ + --parameters ${DATABASE_PARAMS}; diff --git a/requirements.txt b/requirements.txt index 1aa5641..0715877 100755 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ google>=3.0.0 google-api-python-client>=2.86.0 pyperclip~=1.8.2 python-dateutil~=2.8.2 -pyodbc~=5.2.0 +# pyodbc~=5.2.0 requests~=2.31.0 Faker~=22.0.0 elasticsearch~=7.10.1 From f3febb37e0bfc9bf3f56df4c408c29a38237590c Mon Sep 17 00:00:00 2001 From: Humairaa Mulla Date: Tue, 25 Mar 2025 14:06:09 +0000 Subject: [PATCH 2/3] update requirement pckg --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0715877..a06f1d0 100755 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,4 @@ Faker~=22.0.0 elasticsearch~=7.10.1 pre-commit>=3.5.0 psycopg2-binary>=2.9.9 -numpy<2 +numpy>2 From fc24daa2218a69137685da6739b2a08168d7199f Mon Sep 17 00:00:00 2001 From: humulla <43747248+humulla@users.noreply.github.com> Date: Thu, 27 Mar 2025 11:07:08 +0000 Subject: [PATCH 3/3] update property patch script working and tested in dev --- .../asset_table/get_assets_by_prop_ref.py | 11 +- .../asset_table/update_property_patch.py | 197 +++++++++--------- 2 files changed, 110 insertions(+), 98 deletions(-) diff --git a/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py b/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py index 93c6631..784e15e 100644 --- a/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py +++ b/aws/database/dynamodb/scripts/asset_table/get_assets_by_prop_ref.py @@ -54,9 +54,11 @@ def set_id_in_csv_with_asset_id(asset_table: Table, assets_from_csv: list[dict]) if asset_id is None: asset_item["failed_reason"] = f"Invalid assetId: {asset_item['prop_ref']}. " continue - results = get_by_secondary_index(asset_table, "AssetId", "assetId", asset_id) + results = get_by_secondary_index( + asset_table, "AssetId", "assetId", asset_id) if len(results) > 1: - asset_item["failed_reason"] = f"Multiple assets found for assetId {asset_item['prop_ref']}. " + asset_item[ + "failed_reason"] = f"Multiple assets found for assetId {asset_item['prop_ref']}. " continue elif len(results) == 0: asset_item["failed_reason"] = f"No assets found for assetId {asset_item['prop_ref']}. " @@ -69,7 +71,7 @@ def set_id_in_csv_with_asset_id(asset_table: Table, assets_from_csv: list[dict]) def main(): table = get_dynamodb_table(Config.TABLE_NAME, Config.STAGE) - _file_path = r"aws\src\database\data\input\update_property_patch_march_2025.csv" + _file_path = "aws\src\database\data\input\Property_extract_from_Nick_May_2023 - Copy of property_active_edit_09_06 (1).csv" asset_csv_data = csv_to_dict_list(_file_path) # Note: Writing to TSV which can be imported into Google Sheets @@ -79,4 +81,5 @@ def main(): headings = asset_with_ids[0].keys() f.write("\t".join(headings) + "\n") for asset in asset_with_ids: - f.write("\t".join([str(asset[heading]) for heading in headings]) + "\n") + f.write("\t".join([str(asset[heading]) + for heading in headings]) + "\n") diff --git a/aws/database/dynamodb/scripts/asset_table/update_property_patch.py b/aws/database/dynamodb/scripts/asset_table/update_property_patch.py index 6446449..36fe05a 100644 --- a/aws/database/dynamodb/scripts/asset_table/update_property_patch.py +++ b/aws/database/dynamodb/scripts/asset_table/update_property_patch.py @@ -1,94 +1,103 @@ -# 1. Get asset record by asset id (prop ref) - api endpoint -# 2. Get patch record by patch name on csv - api endpoint -# 3. update patch id and area id in asset record to match patch on csv -# 4. loop for all records in csv -from dataclasses import dataclass - -from mypy_boto3_dynamodb.service_resource import Table - -from aws.database.dynamodb.utils.get_by_secondary_index import get_by_secondary_index -from aws.database.dynamodb.utils.get_dynamodb_table import get_dynamodb_table -from aws.utils.csv_to_dict_list import csv_to_dict_list -from get_assets_by_prop_ref import main -from aws.utils.logger import Logger -from enums.enums import Stage -from aws.utils.progress_bar import ProgressBar -from aws.database.domain.dynamo_domain_objects import Patch -from aws.database.domain.dynamo_domain_objects import Asset - - - -@dataclass -class Config: - TABLE_NAME = "PatchesAndAreas" - OUTPUT_CLASS = Patch - LOGGER = Logger() - STAGE = Stage.HOUSING_STAGING - ITEM_COUNT_LIMIT = 10 # Set to None to return all items - -class AssetConfig: - TABLE_NAME = "Assets" - OUTPUT_CLASS = Asset - LOGGER = Logger() - STAGE = Stage.HOUSING_STAGING - ITEM_COUNT_LIMIT = 10 # Set to None to return all items - -def get_patch_record_by_patch_name(patchesAndAreas_table: Table, patch_from_csv: list[dict]) -> list[dict]: - """ - Iterate through the asset and set the id for each record - :return: ****TO UPDATE*** A dictionary containing the updated assets and the assets that failed to update - """ - progress_bar = ProgressBar(len(patch_from_csv)) - - for i, patch_item in enumerate(patch_from_csv): - if isinstance(patch_item["patch_pk"], str) and len(patch_item["patch_pk"]) > 0: - continue - else: - if i % 10 == 0: - progress_bar.display(i) - patch_name = patch_item["name"] - if patch_name is None: - patch_name["failed_reason"] = f"Invalid patchName: {patch_item['name']}. " - continue - results = get_by_secondary_index(patchesAndAreas_table, "patchName", "name", patch_name) - if len(results) > 1: - patch_item["failed_reason"] = f"Multiple patches found for patchName {patch_item['name']}. " - continue - elif len(results) == 0: - patch_item["failed_reason"] = f"No patch found for patchName {patch_item['name']}. " - continue - # update patch id and area id in asset record to match patch on csv - patch = results[0] - patch_id = patch.get("id") - area_id = patch.get("parentId") - prop_ref = patch_item["prop_ref"] - table = get_dynamodb_table(AssetConfig.TABLE_NAME, AssetConfig.STAGE) - update_areaid_patchid_in_asset(patch_id, area_id, prop_ref, table, AssetConfig.LOGGER) - patch_from_csv[i]["patch_pk"] = patch.get("id") - - return patch_from_csv - -def update_areaid_patchid_in_asset(patch_id: str | None, area_id: str | None, prop_ref :str, assets_table: Table, logger: Logger): - if len(prop_ref) is None: - logger.log(f'Could not find asset with prop ref {prop_ref}') - asset = get_by_secondary_index(assets_table, "AssetId", "assetId", prop_ref) - if (len(asset) > 0): - asset_record = asset[0] - asset_record["patchId"] = patch_id if patch_id else None - asset_record["areaId"] = area_id if area_id else None - logger.log(f'updating areaid to {asset_record["areaId"]} and patchId to {asset_record["patchId"]} for prop_ref {prop_ref}') - assets_table.put_item(Item=asset_record) - - - -def update_property_patch(): - # 1. Get asset record by asset id (prop ref) - assets_by_prop_ref = main() - # 2. Get patch record by patch name on csv - api endpoint - table = get_dynamodb_table(Config.TABLE_NAME, Config.STAGE) - _file_path = r"aws\src\database\data\input\update_property_patch_march_2025.csv" - patch_csv_data = csv_to_dict_list(_file_path) - get_by_patch_name = get_patch_record_by_patch_name(table, patch_csv_data) - -if __name__ == "__main__": - update_property_patch() \ No newline at end of file +from dataclasses import dataclass + +from mypy_boto3_dynamodb.service_resource import Table + +from aws.database.dynamodb.utils.get_by_secondary_index import get_by_secondary_index +from aws.database.dynamodb.utils.get_dynamodb_table import get_dynamodb_table +from aws.utils.csv_to_dict_list import csv_to_dict_list +from aws.utils.logger import Logger +from enums.enums import Stage +from aws.utils.progress_bar import ProgressBar +from aws.database.domain.dynamo_domain_objects import Asset + +import requests +import os +import re +from dotenv import load_dotenv +from utils.confirm import confirm + + +load_dotenv() + + +@dataclass +class Config: + TABLE_NAME = "Assets" + OUTPUT_CLASS = Asset + LOGGER = Logger() + STAGE = Stage.HOUSING_STAGING + ITEM_COUNT_LIMIT = 10 # Set to None to return all items + + +def clean_asset_id(asset_id: str) -> str | None: + asset_id = str(asset_id) + asset_id = asset_id.replace(" ", "") + if len(asset_id) < 8: + # pad with zeros + asset_id = asset_id.zfill(8) + if asset_id.isnumeric(): + if len(asset_id) > 8: + return None + propref_regex = re.compile(r"^([0-9]{8})$") + asset_id = propref_regex.findall(asset_id)[0] + return asset_id + return None + + +def update_areaid_patchid_in_asset(assets_table: Table, patch_from_csv: list[dict], logger: Logger): + # get all patches and areas endpoint + url = os.getenv('GETALLPATCHESANDAREAAPIURL') + token = os.getenv('AUTH_TOKEN') + if None not in (url, token): + response = requests.get(url, headers={'Authorization': token}) + allPatchesAndAreas = response.json() + + progress_bar = ProgressBar(len(patch_from_csv)) + # get asset record based on prop ref from csv + for i, item in enumerate(patch_from_csv): + if i % 10 == 0: + progress_bar.display(i) + asset_id = item["prop_ref"] + asset_id = clean_asset_id(asset_id) + if asset_id is None: + logger.log(f"Invalid assetId: {item['prop_ref']}. ") + continue + results = get_by_secondary_index( + assets_table, "AssetId", "assetId", asset_id) + if len(results) > 1: + logger.log( + f"Multiple assets found for assetId {item['prop_ref']}. ") + continue + elif len(results) == 0: + logger.log(f"No assets found for assetId {item['prop_ref']}. ") + continue + asset = results[0] + patchName = item["name"] + if not patchName: + logger.log(f'patchName is not given for propertyRef {asset_id}') + continue + # //get patch object based on patch name given in csv + for patch in allPatchesAndAreas: + if patch['name'] == patchName: + patchId = patch['id'] + areaId = patch['parentId'] + asset["patchId"] = patchId if patchId else None + asset["areaId"] = areaId if areaId else None + logger.log( + f'updating areaid to {asset["areaId"]} and patchId to {asset["patchId"]} for prop_ref {asset_id}') + assets_table.put_item(Item=asset) + logger.log(f"UPDATED {asset['id']}") + + +def update_property_patch(): + table = get_dynamodb_table(Config.TABLE_NAME, Config.STAGE) + _file_path = r"/workspaces/mtfh-scripts/aws/Propert Patch List for script STAGING.csv" + patch_csv_data = csv_to_dict_list(_file_path) + + if confirm(f"Are you sure you want to update property patch in {Config.STAGE.to_env_name()}?"): + update_areaid_patchid_in_asset( + table, patch_csv_data, Config.LOGGER) + + +if __name__ == "__main__": + update_property_patch()