From 0fd9490f2049cd95738093c14a81e46d12cf5831 Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Mon, 9 Mar 2026 17:13:23 -0700 Subject: [PATCH 1/2] Add script to ensure that user IDs don't change On GitHub, you are allowed to rename yourself; this guards against that situation, because (a) we should be using the new username and (b) another user may co-opt the old username. --- README.md | 8 ++- action.yaml | 9 ++- validate-user-ids.py | 128 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 validate-user-ids.py diff --git a/README.md b/README.md index 089767e..f7811a6 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ This actions reads in a `teams.yaml` file which specifies team membership and permission. It then synchronizes that with GitHub, making changes as necessary. +Before doing so, it validates that GitHub usernames did not change, by +matching user IDs cached in `user-ids.yaml`. + Note that it only touches teams that are specified in the `teams.yaml`. In other words, if a team is deleted from the YAML file, the synchronization script won't do anything about it (it has no @@ -84,12 +87,13 @@ A token can be created at: https://github.com/settings/tokens/new -## Initial creation of `teams.yaml` +## Initial creation of `teams.yaml` and `user-ids.yaml` Existing team membership can be downloaded from GitHub using: ``` -sync-teams-to-gh.py --download > teams.yaml +python sync-teams-to-gh.py --download > teams.yaml +python validate-user-ids.py --sync ``` ## Revoking team access from repo diff --git a/action.yaml b/action.yaml index df0933a..20353ec 100644 --- a/action.yaml +++ b/action.yaml @@ -13,10 +13,17 @@ runs: uses: actions/setup-python@v6 with: python-version: "3.11" + - name: Validate user IDs + env: + GH_TOKEN: ${{ inputs.token }} + shell: bash + run: >- + python -m pip install pyyaml --quiet; + python ${{ github.action_path }}/validate-user-ids.py - name: Sync teams env: GH_TOKEN: ${{ inputs.token }} shell: bash run: >- - python -m pip install pyyaml requests; + python -m pip install pyyaml requests --quiet; python ${{ github.action_path }}/sync-teams-to-gh.py -o ${{ github.repository_owner }} -m >> $GITHUB_STEP_SUMMARY diff --git a/validate-user-ids.py b/validate-user-ids.py new file mode 100644 index 0000000..54117cf --- /dev/null +++ b/validate-user-ids.py @@ -0,0 +1,128 @@ +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +import yaml + + +def fetch_user_id(username, token): + """Fetches the GitHub user ID for a given username.""" + url = f"https://api.github.com/users/{username}" + req = urllib.request.Request(url) + req.add_header("Authorization", f"Bearer {token}") + req.add_header("Accept", "application/vnd.github.v3+json") + req.add_header("User-Agent", "scientific-python-sync") + + try: + with urllib.request.urlopen(req) as response: + data = json.loads(response.read().decode()) + return data["id"] + except urllib.error.HTTPError as e: + print(f"Error fetching {username}: {e.code} {e.reason}", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error fetching {username}: {e}", file=sys.stderr) + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser( + description="Validate GitHub user IDs in user-ids.yaml" + ) + parser.add_argument( + "--sync", + action="store_true", + help="Add missing, remove extra usernames in user-ids.yaml to match teams.yaml", + ) + args = parser.parse_args() + + # Load teams + try: + with open("teams.yaml", "r") as f: + teams = yaml.safe_load(f) + except Exception as e: + print(f"Failed to load teams.yaml: {e}", file=sys.stderr) + sys.exit(1) + + all_members = {m for team in teams for m in team.get("members", [])} + + try: + with open("user-ids.yaml", "r") as f: + captured_user_ids = yaml.safe_load(f) or [] + except FileNotFoundError: + captured_user_ids = [] + + user_id_map = {entry["name"]: entry.get("id") for entry in captured_user_ids} + + token = os.environ.get("GH_TOKEN") + if not token: + print("Error: GH_TOKEN environment variable is not set.", file=sys.stderr) + print(" Generate a token with no added roles/permissions.", file=sys.stderr) + sys.exit(1) + + cache_file = ".user-id-cache.json" + try: + with open(cache_file) as f: + cache = json.load(f) + print(f"Loaded cache from {cache_file}.") + except FileNotFoundError: + cache = {} + print(f"No cache found; will create {cache_file}.") + + # Fetch and match IDs + fetched_ids = {} + for username in sorted(all_members): + if username in cache: + fetched_ids[username] = cache[username] + else: + print(f"Fetching ID for {username}...") + fetched_ids[username] = fetch_user_id(username, token) + cache[username] = fetched_ids[username] + with open(cache_file, "w") as f: + json.dump(cache, f) + + current_id = user_id_map.get(username) + if username in user_id_map and current_id != fetched_ids[username]: + print( + f"Error: ID for `{username}` differs! Existing: `{current_id}`, fetched: `{fetched_ids[username]}`.", + file=sys.stderr, + ) + print( + "This may indicate a username was reused. Remove the user from user-ids.yaml and re-run with --sync.", + file=sys.stderr, + ) + sys.exit(1) + + missing_members = all_members - user_id_map.keys() + extra_members = user_id_map.keys() - all_members + + if missing_members: + print("The following users are in teams.yaml but missing from user-ids.yaml:") + for m in sorted(missing_members): + print(f" - {m}") + if extra_members: + print("The following users are in user-ids.yaml but not in teams.yaml:") + for m in sorted(extra_members): + print(f" - {m}") + + if args.sync: + with open("user-ids.yaml", "w") as f: + for username in sorted(all_members): + if username in missing_members: + print(f"Adding {username}") + f.write(f"- name: {username}\n") + f.write(f" id: {fetched_ids[username]}\n") + for m in sorted(extra_members): + print(f"Removed {m}") + print("Successfully updated user-ids.yaml") + else: + if missing_members or extra_members: + print("\nRun validate-user-ids.py --sync locally to update user-ids.yaml.", file=sys.stderr) + sys.exit(1) + print("Validation successful. No user IDs changed.") + + +if __name__ == "__main__": + main() From bc44069f239b2bf354e4e78677a448cff5db2a51 Mon Sep 17 00:00:00 2001 From: Stefan van der Walt Date: Mon, 9 Mar 2026 17:17:44 -0700 Subject: [PATCH 2/2] Lint --- validate-user-ids.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/validate-user-ids.py b/validate-user-ids.py index 54117cf..4af44a3 100644 --- a/validate-user-ids.py +++ b/validate-user-ids.py @@ -59,7 +59,9 @@ def main(): token = os.environ.get("GH_TOKEN") if not token: print("Error: GH_TOKEN environment variable is not set.", file=sys.stderr) - print(" Generate a token with no added roles/permissions.", file=sys.stderr) + print( + " Generate a token with no added roles/permissions.", file=sys.stderr + ) sys.exit(1) cache_file = ".user-id-cache.json" @@ -119,7 +121,10 @@ def main(): print("Successfully updated user-ids.yaml") else: if missing_members or extra_members: - print("\nRun validate-user-ids.py --sync locally to update user-ids.yaml.", file=sys.stderr) + print( + "\nRun validate-user-ids.py --sync locally to update user-ids.yaml.", + file=sys.stderr, + ) sys.exit(1) print("Validation successful. No user IDs changed.")