From a6712f44f054b98bce134b2bf3a1bd6ba4b7121a Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Fri, 5 Dec 2025 15:47:23 -0800 Subject: [PATCH 01/11] adding sync logic for steering and specifications --- .github/workflows/sync-steer-specs.yml | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .github/workflows/sync-steer-specs.yml diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml new file mode 100644 index 00000000000..6b8de0d1635 --- /dev/null +++ b/.github/workflows/sync-steer-specs.yml @@ -0,0 +1,66 @@ +name: syncSteerSpecs +on: + push: + branches: ["main"] + workflow_dispatch: + inputs: + dir_name: + description: 'Sync Directories' + required: true + default: 'steering_docs' + type: choice + options: + - steering_docs + - scenarios + + +permissions: + id-token: write + +jobs: + run_job_with_aws: + runs-on: ubuntu-latest + env: + sdk_name: ${{ github.event.inputs.sdk_name || 'steering_docs' }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v5 + with: + role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} + aws-region: us-west-2 + + - name: Set SDK and language mapping for S3 + run: | + if [ "$dir_name" == "steering_docs" ]; then + echo "S3_NAME=steering-docs" >> $GITHUB_ENV + elif [ "$dir_name" == "scenarios" ]; then + echo "S3_NAME=final-specs" >> $GITHUB_ENV + fi + + - name: Filter SPECIFICATION.md files for scenarios + if: ${{ github.event.inputs.dir_name == 'scenarios' }} + run: | + find ./scenarios -name "SPECIFICATION.md" -exec cp --parents {} ./filtered_scenarios/ \; + + - name: Upload/Sync to S3 + run: | + if [ "$dir_name" == "scenarios" ]; then + aws s3 sync "./filtered_scenarios/scenarios/" "s3://$S3_NAME-bucket/" --delete + else + aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete + fi + + - name: Sync Knowledge Base Data Source + run: | + aws lambda invoke \ + --function-name KB_Updater \ + --payload "{\"language\":\"$S3_NAME\",\"region\":\"us-west-2\"}" \ + --cli-binary-format raw-in-base64-out \ + response.json + + echo "Knowledge Base sync initiated" + cat response.json \ No newline at end of file From 533ef8fc27e4628359c44fa38afb149821ddca50 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Fri, 5 Dec 2025 15:48:59 -0800 Subject: [PATCH 02/11] check in testing branch --- .github/workflows/sync-steer-specs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml index 6b8de0d1635..804c433b6e4 100644 --- a/.github/workflows/sync-steer-specs.yml +++ b/.github/workflows/sync-steer-specs.yml @@ -1,7 +1,7 @@ name: syncSteerSpecs on: push: - branches: ["main"] + branches: ["sync-for-steer-specs"] workflow_dispatch: inputs: dir_name: @@ -53,7 +53,7 @@ jobs: else aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete fi - + - name: Sync Knowledge Base Data Source run: | aws lambda invoke \ From 4c1b733645a7670a2823b4504ecea8925417ab45 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Fri, 5 Dec 2025 16:12:18 -0800 Subject: [PATCH 03/11] updating logic --- .github/workflows/sync-steer-specs.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml index 804c433b6e4..f78e59c7488 100644 --- a/.github/workflows/sync-steer-specs.yml +++ b/.github/workflows/sync-steer-specs.yml @@ -44,14 +44,17 @@ jobs: - name: Filter SPECIFICATION.md files for scenarios if: ${{ github.event.inputs.dir_name == 'scenarios' }} run: | - find ./scenarios -name "SPECIFICATION.md" -exec cp --parents {} ./filtered_scenarios/ \; + find ./scenarios -name "SPECIFICATION.md" | while read file; do + mkdir -p "./filtered_scenarios/$(dirname "$file")" + cp "$file" "./filtered_scenarios/$file" + done - name: Upload/Sync to S3 run: | - if [ "$dir_name" == "scenarios" ]; then - aws s3 sync "./filtered_scenarios/scenarios/" "s3://$S3_NAME-bucket/" --delete - else + if [ "$dir_name" == "steering_docs" ]; then aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete + else + aws s3 sync "./filtered_scenarios/scenarios/" "s3://$S3_NAME-bucket/" --delete fi - name: Sync Knowledge Base Data Source From 76c293af63ac993340ed444b8f4ad6107838cd24 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Fri, 5 Dec 2025 16:16:58 -0800 Subject: [PATCH 04/11] correcting variable name --- .github/workflows/sync-steer-specs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml index f78e59c7488..694c13d7544 100644 --- a/.github/workflows/sync-steer-specs.yml +++ b/.github/workflows/sync-steer-specs.yml @@ -21,7 +21,7 @@ jobs: run_job_with_aws: runs-on: ubuntu-latest env: - sdk_name: ${{ github.event.inputs.sdk_name || 'steering_docs' }} + dir_name: ${{ github.event.inputs.dir_name || 'steering_docs' }} steps: - name: Checkout @@ -54,7 +54,7 @@ jobs: if [ "$dir_name" == "steering_docs" ]; then aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete else - aws s3 sync "./filtered_scenarios/scenarios/" "s3://$S3_NAME-bucket/" --delete + aws s3 sync "./filtered_scenarios/" "s3://$S3_NAME-bucket/" --delete fi - name: Sync Knowledge Base Data Source From 8409d54e1d239a096179655191eee75cd4379344 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Mon, 15 Dec 2025 16:13:19 -0800 Subject: [PATCH 05/11] check for spec filetering,pushing,syncing --- .github/workflows/sync-steer-specs.yml | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml index 694c13d7544..daf1dd9c1f6 100644 --- a/.github/workflows/sync-steer-specs.yml +++ b/.github/workflows/sync-steer-specs.yml @@ -7,12 +7,12 @@ on: dir_name: description: 'Sync Directories' required: true - default: 'steering_docs' + default: 'scenarios' type: choice options: - steering_docs - scenarios - + - coding-standards permissions: id-token: write @@ -33,26 +33,39 @@ jobs: role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} aws-region: us-west-2 - - name: Set SDK and language mapping for S3 + - name: Configure directory mapping for S3 run: | if [ "$dir_name" == "steering_docs" ]; then echo "S3_NAME=steering-docs" >> $GITHUB_ENV + elif [ "$dir_name" == "coding-standards" ]; then + echo "S3_NAME=coding-standards" >> $GITHUB_ENV elif [ "$dir_name" == "scenarios" ]; then echo "S3_NAME=final-specs" >> $GITHUB_ENV fi - - name: Filter SPECIFICATION.md files for scenarios + - name: Filter SPECIFICATION.md files for specs if: ${{ github.event.inputs.dir_name == 'scenarios' }} run: | find ./scenarios -name "SPECIFICATION.md" | while read file; do mkdir -p "./filtered_scenarios/$(dirname "$file")" cp "$file" "./filtered_scenarios/$file" done + + - name: Clone and filter for coding standards + if: ${{ github.event.inputs.dir_name == 'coding-standards' }} + run: | + git clone https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.wiki.git wiki-repo + mkdir -p ./filtered-wiki + find ./wiki-repo -type f -name "*[Gg]uidelines*.md" -o -name "*[Ss]tandards*.md" | while read file; do + cp "$file" ./filtered-wiki/ + done - name: Upload/Sync to S3 run: | if [ "$dir_name" == "steering_docs" ]; then aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete + elif [ "$dir_name" == "coding-standards" ]; then + aws s3 sync "./filtered-wiki/" "s3://$S3_NAME-bucket/" --delete else aws s3 sync "./filtered_scenarios/" "s3://$S3_NAME-bucket/" --delete fi @@ -66,4 +79,4 @@ jobs: response.json echo "Knowledge Base sync initiated" - cat response.json \ No newline at end of file + cat response.json From 3d403cbbf6e9eba0a830b42d7ff6222d09bf2b09 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Mon, 15 Dec 2025 16:28:11 -0800 Subject: [PATCH 06/11] move everything to one workflow --- .github/workflows/sync-S3-KB.yml | 45 ++++++++++++++++++++++++-- .github/workflows/sync-steer-specs.yml | 2 +- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sync-S3-KB.yml b/.github/workflows/sync-S3-KB.yml index 1642517cf69..0fd4cb8bc88 100644 --- a/.github/workflows/sync-S3-KB.yml +++ b/.github/workflows/sync-S3-KB.yml @@ -1,7 +1,7 @@ name: syncS3andKB on: push: - branches: ["main"] + branches: ["sync-for-steer-specs"] workflow_dispatch: inputs: sdk_name: @@ -21,6 +21,9 @@ on: - php - cpp - kotlin + - steering_docs + - specs + - coding-standards permissions: id-token: write @@ -53,11 +56,35 @@ jobs: echo "S3_LANGUAGE=rust" >> $GITHUB_ENV elif [ "$sdk_name" == "gov2" ]; then echo "S3_LANGUAGE=go" >> $GITHUB_ENV + elif [ "$sdk_name" == "steering_docs" ]; then + echo "S3_LANGUAGE=steering-docs" >> $GITHUB_ENV + elif [ "$sdk_name" == "coding-standards" ]; then + echo "S3_LANGUAGE=coding-standards" >> $GITHUB_ENV + elif [ "$sdk_name" == "specs" ]; then + echo "S3_LANGUAGE=final-specs" >> $GITHUB_ENV else echo "S3_LANGUAGE=$sdk_name" >> $GITHUB_ENV fi - + + - name: Filter SPECIFICATION.md files for specs + if: ${{ github.event.inputs.sdk_name == 'specs' }} + run: | + find ./scenarios -name "SPECIFICATION.md" | while read file; do + mkdir -p "./filtered_specs/$(dirname "$file")" + cp "$file" "./filtered_specs/$file" + done + + - name: Clone and filter for coding standards + if: ${{ github.event.inputs.sdk_name == 'coding-standards' }} + run: | + git clone https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.wiki.git wiki-repo + mkdir -p ./filtered-wiki + find ./wiki-repo -type f -name "*[Gg]uidelines*.md" -o -name "*[Ss]tandards*.md" | while read file; do + cp "$file" ./filtered-wiki/ + done + - name: Extract and copy premium examples in temp. dir. + if: ${{ contains(fromJSON('["javascriptv3","dotnetv4","javav2","rustv1","gov2","swift","python","ruby","php","cpp","kotlin"]'), github.event.inputs.sdk_name) }} run: | MARKDOWN_FILE="./$sdk_name/premium-ex.md" @@ -99,7 +126,8 @@ jobs: fi done - - name: Upload/Sync to S3 + - name: Upload/Sync to S3 (SDK languages) + if: ${{ contains(fromJSON('["javascriptv3","dotnetv4","javav2","rustv1","gov2","swift","python","ruby","php","cpp","kotlin"]'), github.event.inputs.sdk_name) }} run: | for level in "basics" "feature-scenario" "complex-feature-scenario"; do if [ -d "./extracted_snippets/$level" ]; then @@ -107,6 +135,17 @@ jobs: echo "Uploaded $level examples to S3" fi done + + - name: Upload/Sync to S3 (Other directories) + if: ${{ contains(fromJSON('["steering_docs","coding-standards","specs"]'), github.event.inputs.sdk_name) }} + run: | + if [ "$sdk_name" == "steering_docs" ]; then + aws s3 sync "./$sdk_name/" "s3://$S3_LANGUAGE-bucket/" --delete + elif [ "$sdk_name" == "coding-standards" ]; then + aws s3 sync "./filtered-wiki/" "s3://$S3_LANGUAGE-bucket/" --delete + else + aws s3 sync "./filtered_specs/" "s3://$S3_LANGUAGE-bucket/" --delete + fi - name: Sync Knowledge Base Data Source run: | diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml index daf1dd9c1f6..957d4c39438 100644 --- a/.github/workflows/sync-steer-specs.yml +++ b/.github/workflows/sync-steer-specs.yml @@ -1,7 +1,7 @@ name: syncSteerSpecs on: push: - branches: ["sync-for-steer-specs"] + branches: ["main"] workflow_dispatch: inputs: dir_name: From b64e68076d45ea067e2fa3f92ef592b018c33d03 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Mon, 15 Dec 2025 19:08:58 -0800 Subject: [PATCH 07/11] added matrix approach and removed extra sync workflow --- .github/workflows/sync-S3-KB.yml | 5 +- .github/workflows/sync-steer-specs.yml | 82 -------------------------- 2 files changed, 4 insertions(+), 83 deletions(-) delete mode 100644 .github/workflows/sync-steer-specs.yml diff --git a/.github/workflows/sync-S3-KB.yml b/.github/workflows/sync-S3-KB.yml index 0fd4cb8bc88..f674de7a90a 100644 --- a/.github/workflows/sync-S3-KB.yml +++ b/.github/workflows/sync-S3-KB.yml @@ -31,8 +31,11 @@ permissions: jobs: run_job_with_aws: runs-on: ubuntu-latest + strategy: + matrix: + sdk_name: ${{ github.event_name == 'push' && fromJSON('["javascriptv3","dotnetv4","javav2","rustv1","gov2","swift","python","ruby","php","cpp","kotlin","steering_docs","specs","coding-standards"]') || fromJSON(format('["{0}"]', github.event.inputs.sdk_name)) }} env: - sdk_name: ${{ github.event.inputs.sdk_name || 'python' }} + sdk_name: ${{ matrix.sdk_name }} steps: - name: Checkout diff --git a/.github/workflows/sync-steer-specs.yml b/.github/workflows/sync-steer-specs.yml deleted file mode 100644 index 957d4c39438..00000000000 --- a/.github/workflows/sync-steer-specs.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: syncSteerSpecs -on: - push: - branches: ["main"] - workflow_dispatch: - inputs: - dir_name: - description: 'Sync Directories' - required: true - default: 'scenarios' - type: choice - options: - - steering_docs - - scenarios - - coding-standards - -permissions: - id-token: write - -jobs: - run_job_with_aws: - runs-on: ubuntu-latest - env: - dir_name: ${{ github.event.inputs.dir_name || 'steering_docs' }} - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v5 - with: - role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} - aws-region: us-west-2 - - - name: Configure directory mapping for S3 - run: | - if [ "$dir_name" == "steering_docs" ]; then - echo "S3_NAME=steering-docs" >> $GITHUB_ENV - elif [ "$dir_name" == "coding-standards" ]; then - echo "S3_NAME=coding-standards" >> $GITHUB_ENV - elif [ "$dir_name" == "scenarios" ]; then - echo "S3_NAME=final-specs" >> $GITHUB_ENV - fi - - - name: Filter SPECIFICATION.md files for specs - if: ${{ github.event.inputs.dir_name == 'scenarios' }} - run: | - find ./scenarios -name "SPECIFICATION.md" | while read file; do - mkdir -p "./filtered_scenarios/$(dirname "$file")" - cp "$file" "./filtered_scenarios/$file" - done - - - name: Clone and filter for coding standards - if: ${{ github.event.inputs.dir_name == 'coding-standards' }} - run: | - git clone https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.wiki.git wiki-repo - mkdir -p ./filtered-wiki - find ./wiki-repo -type f -name "*[Gg]uidelines*.md" -o -name "*[Ss]tandards*.md" | while read file; do - cp "$file" ./filtered-wiki/ - done - - - name: Upload/Sync to S3 - run: | - if [ "$dir_name" == "steering_docs" ]; then - aws s3 sync "./$dir_name/" "s3://$S3_NAME-bucket/" --delete - elif [ "$dir_name" == "coding-standards" ]; then - aws s3 sync "./filtered-wiki/" "s3://$S3_NAME-bucket/" --delete - else - aws s3 sync "./filtered_scenarios/" "s3://$S3_NAME-bucket/" --delete - fi - - - name: Sync Knowledge Base Data Source - run: | - aws lambda invoke \ - --function-name KB_Updater \ - --payload "{\"language\":\"$S3_NAME\",\"region\":\"us-west-2\"}" \ - --cli-binary-format raw-in-base64-out \ - response.json - - echo "Knowledge Base sync initiated" - cat response.json From d6aeea69d885e5df178483cb8bd7f1b87b2a6ec9 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Mon, 15 Dec 2025 19:14:54 -0800 Subject: [PATCH 08/11] default to main branch --- .github/workflows/sync-S3-KB.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-S3-KB.yml b/.github/workflows/sync-S3-KB.yml index f674de7a90a..ab29796e678 100644 --- a/.github/workflows/sync-S3-KB.yml +++ b/.github/workflows/sync-S3-KB.yml @@ -1,7 +1,7 @@ name: syncS3andKB on: push: - branches: ["sync-for-steer-specs"] + branches: ["main"] workflow_dispatch: inputs: sdk_name: From fcffc0da7b62cef337525dd6103eeefa4548d9e1 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Tue, 16 Dec 2025 11:59:58 -0800 Subject: [PATCH 09/11] rename basics to basics_scenario --- steering_docs/dotnet-tech/{basics.md => basics_scenario.md} | 0 steering_docs/go-tech/{basics.md => basics_scenario.md} | 0 steering_docs/java-tech/{basics.md => basics_scenario.md} | 0 steering_docs/kotlin-tech/{basics.md => basics_scenario.md} | 0 steering_docs/php-tech/{basics.md => basics_scenario.md} | 0 steering_docs/python-tech/{basics.md => basics_scenario.md} | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename steering_docs/dotnet-tech/{basics.md => basics_scenario.md} (100%) rename steering_docs/go-tech/{basics.md => basics_scenario.md} (100%) rename steering_docs/java-tech/{basics.md => basics_scenario.md} (100%) rename steering_docs/kotlin-tech/{basics.md => basics_scenario.md} (100%) rename steering_docs/php-tech/{basics.md => basics_scenario.md} (100%) rename steering_docs/python-tech/{basics.md => basics_scenario.md} (100%) diff --git a/steering_docs/dotnet-tech/basics.md b/steering_docs/dotnet-tech/basics_scenario.md similarity index 100% rename from steering_docs/dotnet-tech/basics.md rename to steering_docs/dotnet-tech/basics_scenario.md diff --git a/steering_docs/go-tech/basics.md b/steering_docs/go-tech/basics_scenario.md similarity index 100% rename from steering_docs/go-tech/basics.md rename to steering_docs/go-tech/basics_scenario.md diff --git a/steering_docs/java-tech/basics.md b/steering_docs/java-tech/basics_scenario.md similarity index 100% rename from steering_docs/java-tech/basics.md rename to steering_docs/java-tech/basics_scenario.md diff --git a/steering_docs/kotlin-tech/basics.md b/steering_docs/kotlin-tech/basics_scenario.md similarity index 100% rename from steering_docs/kotlin-tech/basics.md rename to steering_docs/kotlin-tech/basics_scenario.md diff --git a/steering_docs/php-tech/basics.md b/steering_docs/php-tech/basics_scenario.md similarity index 100% rename from steering_docs/php-tech/basics.md rename to steering_docs/php-tech/basics_scenario.md diff --git a/steering_docs/python-tech/basics.md b/steering_docs/python-tech/basics_scenario.md similarity index 100% rename from steering_docs/python-tech/basics.md rename to steering_docs/python-tech/basics_scenario.md From ca3be9ff50e33f2158a18e06f72d96a024545ea9 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Tue, 16 Dec 2025 12:17:22 -0800 Subject: [PATCH 10/11] added KB_Updater and its workflow --- .github/workflows/KB_Updater.yml | 31 +++++++ .tools/lambda/KB_Updater.py | 146 +++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 .github/workflows/KB_Updater.yml create mode 100644 .tools/lambda/KB_Updater.py diff --git a/.github/workflows/KB_Updater.yml b/.github/workflows/KB_Updater.yml new file mode 100644 index 00000000000..46f9db9fe45 --- /dev/null +++ b/.github/workflows/KB_Updater.yml @@ -0,0 +1,31 @@ +name: Deploy KB_Updater Lambda Function +on: + push: + branches: ["main"] + paths: + - '.tools/lambda/KB_Updater.py' + workflow_dispatch: + +permissions: + id-token: write + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v5 + with: + role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} + aws-region: us-west-2 + + - name: Deploy Lambda + run: | + cd .tools/lambda + zip function.zip KB_Updater.py + aws lambda update-function-code \ + --function-name KB_Updater \ + --zip-file fileb://function.zip diff --git a/.tools/lambda/KB_Updater.py b/.tools/lambda/KB_Updater.py new file mode 100644 index 00000000000..f7b69df4a31 --- /dev/null +++ b/.tools/lambda/KB_Updater.py @@ -0,0 +1,146 @@ +import boto3 +import json +import datetime +import time + +class DateTimeEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, datetime.datetime): + return obj.isoformat() + return super().default(obj) + +def get_knowledge_base_id(knowledge_base_name, region_name, bedrock_agent): + response = bedrock_agent.list_knowledge_bases() + for kb in response['knowledgeBaseSummaries']: + if kb['name'] == knowledge_base_name: + return kb['knowledgeBaseId'] + raise ValueError(f"Knowledge base '{knowledge_base_name}' not found") + +def get_or_create_data_source(knowledge_base_id, language, region_name, bedrock_agent): + # List existing data sources + response = bedrock_agent.list_data_sources(knowledgeBaseId=knowledge_base_id) + data_sources = response['dataSourceSummaries'] + + # Look for existing data source for this SDK + for ds in data_sources: + if language in ds['name'] and ds['name'] != "default": + return ds['dataSourceId'], ds['name'], False # Found existing + if language in ["steering-docs", "final-specs"]: + ds_name=f"{language}-data-source" + bucket_name = f"{language}-bucket" + else: + ds_name=f"{language}-premium-data-source" + bucket_name = f"{language}-premium-bucket" + # Create new data source if none found + response = bedrock_agent.create_data_source( + knowledgeBaseId=knowledge_base_id, + name=ds_name, + dataSourceConfiguration={ + "type": "S3", + "s3Configuration": { + "bucketArn": f"arn:aws:s3:::{bucket_name}" + } + }, + vectorIngestionConfiguration = { + "chunkingConfiguration": { + "chunkingStrategy": "HIERARCHICAL", + "hierarchicalChunkingConfiguration": { + "levelConfigurations": [ + { + "maxTokens": 1500 + }, + { + "maxTokens": 300 + } + ], + "overlapTokens": 75 + } + } + } + ) + return response['dataSource']['dataSourceId'], response['dataSource']['name'], True # Created new + +def sync_data_source(knowledge_base_id, data_source_id, region_name, bedrock_agent): + response = bedrock_agent.start_ingestion_job( + knowledgeBaseId=knowledge_base_id, + dataSourceId=data_source_id + ) + return response + +def monitor_ingestion_job(knowledge_base_id, data_source_id, ingestion_job_id, region_name, bedrock_agent): + max_attempts = 100 + attempts = 0 + + while attempts < max_attempts: + job_status = bedrock_agent.get_ingestion_job( + knowledgeBaseId=knowledge_base_id, + dataSourceId=data_source_id, + ingestionJobId=ingestion_job_id + ) + + status = job_status['ingestionJob']['status'] + print(f"Current status: {status} - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + + if status in ['COMPLETE', 'FAILED', 'STOPPED']: + return job_status + + attempts += 1 + time.sleep(5) + + return {"status": "TIMEOUT", "message": "Job monitoring timed out after 5 minutes"} + +def lambda_handler(event, context): + language = event.get('language', 'python') + region_name = event.get('region_name', 'us-west-2') + if language in ["steering-docs", "final-specs","coding-standards"]: + knowledge_base_name = f"{language}-KB" + else: + knowledge_base_name = f"{language}-premium-KB" + + bedrock_agent = boto3.client('bedrock-agent', region_name=region_name) + + knowledge_base_id = get_knowledge_base_id(knowledge_base_name, region_name, bedrock_agent) + + # Get or create data source + data_source_id, data_source_name, is_new = get_or_create_data_source( + knowledge_base_id, language, region_name, bedrock_agent + ) + + results = { + "data_source": { + "id": data_source_id, + "name": data_source_name, + "is_new": is_new + }, + "ingestion_job": None, + "statistics": None + } + + # Sync the data source + print(f"Syncing data source {data_source_name}...") + sync_result = sync_data_source(knowledge_base_id, data_source_id, region_name, bedrock_agent) + + ingestion_job_id = sync_result['ingestionJob']['ingestionJobId'] + results["ingestion_job"] = {"id": ingestion_job_id, "status": "STARTED"} + + # Monitor the ingestion job + final_status = monitor_ingestion_job( + knowledge_base_id, data_source_id, ingestion_job_id, region_name, bedrock_agent + ) + + results["ingestion_job"]["status"] = final_status.get('ingestionJob', {}).get('status', 'UNKNOWN') + + # Get statistics + if 'statistics' in final_status.get('ingestionJob', {}): + stats = final_status['ingestionJob']['statistics'] + results["statistics"] = { + "documents_processed": stats.get('numberOfDocumentsScanned', 0), + "documents_failed": stats.get('numberOfDocumentsFailed', 0), + "documents_indexed": stats.get('numberOfNewDocumentsIndexed', 0), + "documents_modified_indexed": stats.get('numberOfModifiedDocumentsIndexed',0) + } + + return { + 'statusCode': 200, + 'body': json.dumps(results, cls=DateTimeEncoder) + } \ No newline at end of file From a33fdae2e73a7528e22bdd1b2b811a9a6684c615 Mon Sep 17 00:00:00 2001 From: tejasgn <“tejasgn@amazon.com”> Date: Tue, 16 Dec 2025 12:54:03 -0800 Subject: [PATCH 11/11] fixing validation errors --- .tools/lambda/KB_Updater.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.tools/lambda/KB_Updater.py b/.tools/lambda/KB_Updater.py index f7b69df4a31..984bda915a3 100644 --- a/.tools/lambda/KB_Updater.py +++ b/.tools/lambda/KB_Updater.py @@ -1,3 +1,6 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + import boto3 import json import datetime @@ -112,7 +115,7 @@ def lambda_handler(event, context): "name": data_source_name, "is_new": is_new }, - "ingestion_job": None, + "ingestion_job": {}, "statistics": None }