From ee243ad12350769f1aa39253929804b40f69dfef Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 10:47:36 +0300 Subject: [PATCH 01/17] Syndicate Hugo blog posts to dev.to and Hashnode Daily GitHub Action that picks the oldest blog post under docs/website/content/blog dated after 2026-04-30, at least 7 days old, and not yet syndicated to a given platform. The script absolutizes relative links/images, inserts a one-sentence "What is Codename One" blurb after the fold, and POSTs to each platform with canonical_url pointing back to the original on www.codenameone.com. Per-platform state in scripts/website/syndication-state.json so partial failures retry only the failed side. Requires repo secrets: DEVTO_API_KEY, HASHNODE_TOKEN, HASHNODE_PUBLICATION_ID. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/blog-syndication.yml | 62 ++++ scripts/website/syndicate_blog_posts.py | 449 ++++++++++++++++++++++++ scripts/website/syndication-state.json | 4 + 3 files changed, 515 insertions(+) create mode 100644 .github/workflows/blog-syndication.yml create mode 100755 scripts/website/syndicate_blog_posts.py create mode 100644 scripts/website/syndication-state.json diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml new file mode 100644 index 0000000000..741895258e --- /dev/null +++ b/.github/workflows/blog-syndication.yml @@ -0,0 +1,62 @@ +name: Syndicate Blog Posts + +on: + schedule: + # Daily at 13:00 UTC. Runs from the default branch only, per GitHub's cron rules. + - cron: '0 13 * * *' + workflow_dispatch: + inputs: + dry_run: + description: 'Skip API calls and only print what would happen.' + type: boolean + default: false + +permissions: + contents: write + +concurrency: + group: blog-syndication + cancel-in-progress: false + +jobs: + syndicate: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + with: + # Token with write scope so the post-run commit can push the state file. + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Run syndication script + id: syndicate + env: + DEVTO_API_KEY: ${{ secrets.DEVTO_API_KEY }} + HASHNODE_TOKEN: ${{ secrets.HASHNODE_TOKEN }} + HASHNODE_PUBLICATION_ID: ${{ secrets.HASHNODE_PUBLICATION_ID }} + run: | + set -euo pipefail + if [ "${{ inputs.dry_run }}" = "true" ]; then + python3 scripts/website/syndicate_blog_posts.py --dry-run + else + python3 scripts/website/syndicate_blog_posts.py + fi + + - name: Commit updated syndication state + if: ${{ inputs.dry_run != true }} + run: | + set -euo pipefail + if git diff --quiet -- scripts/website/syndication-state.json; then + echo "No state changes to commit." + exit 0 + fi + git config user.name 'github-actions[bot]' + git config user.email 'github-actions[bot]@users.noreply.github.com' + git add scripts/website/syndication-state.json + git commit -m "ci: record blog syndication results" + git push diff --git a/scripts/website/syndicate_blog_posts.py b/scripts/website/syndicate_blog_posts.py new file mode 100755 index 0000000000..404a66ba94 --- /dev/null +++ b/scripts/website/syndicate_blog_posts.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python3 +"""Syndicate Codename One Hugo blog posts to dev.to and Hashnode. + +Selects the oldest blog post under ``docs/website/content/blog`` that: + +* has a ``date`` strictly after the eligibility floor (default: 2026-04-30), +* is at least ``--min-age-days`` old (default: 7), +* has not yet been syndicated to a given target platform. + +For each unsyndicated platform on the chosen post the script POSTs the +content with ``canonical_url`` pointing back at the original on +``www.codenameone.com`` and records the resulting URL / id in +``scripts/website/syndication-state.json``. + +Designed to run from a daily GitHub Action with only the Python standard +library available. +""" + +from __future__ import annotations + +import argparse +import datetime as dt +import json +import os +import re +import sys +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +REPO_ROOT = Path(__file__).resolve().parents[2] +BLOG_DIR = REPO_ROOT / "docs" / "website" / "content" / "blog" +STATE_FILE = REPO_ROOT / "scripts" / "website" / "syndication-state.json" +SITE_BASE_URL = "https://www.codenameone.com" + +ELIGIBILITY_FLOOR = dt.date(2026, 4, 30) # posts must be strictly newer than this +MIN_AGE_DAYS = 7 + +CN1_BLURB = ( + "> **What is Codename One?** Codename One is an open-source framework for " + "building native iOS, Android, desktop, and web apps from a single Java " + "or Kotlin codebase. Learn more at " + "[codenameone.com](https://www.codenameone.com/)." +) + +DEVTO_TAGS = ["java", "mobile", "android", "ios"] +HASHNODE_TAGS = [ + {"slug": "java", "name": "Java"}, + {"slug": "mobile", "name": "Mobile"}, + {"slug": "android", "name": "Android"}, + {"slug": "ios", "name": "iOS"}, +] + + +@dataclass +class Post: + path: Path + slug: str + title: str + date: dt.date + front_matter: dict[str, Any] + body: str + + @property + def canonical_url(self) -> str: + url_field = self.front_matter.get("url") + if isinstance(url_field, str) and url_field.startswith("/"): + return f"{SITE_BASE_URL}{url_field}" + return f"{SITE_BASE_URL}/blog/{self.slug}/" + + @property + def cover_image(self) -> str | None: + match = re.search(r"!\[[^\]]*\]\((/blog/[^)\s]+)\)", self.body) + if match: + return f"{SITE_BASE_URL}{match.group(1)}" + return None + + +@dataclass +class State: + raw: dict[str, Any] = field(default_factory=dict) + + @classmethod + def load(cls, path: Path) -> "State": + if not path.exists(): + return cls(raw={"posts": {}}) + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + if "posts" not in data or not isinstance(data["posts"], dict): + data["posts"] = {} + return cls(raw=data) + + def save(self, path: Path) -> None: + ordered = {key: self.raw[key] for key in ("_comment", "posts") if key in self.raw} + for key, value in self.raw.items(): + if key not in ordered: + ordered[key] = value + with path.open("w", encoding="utf-8") as handle: + json.dump(ordered, handle, indent=2, sort_keys=False) + handle.write("\n") + + def is_syndicated(self, slug: str, platform: str) -> bool: + post = self.raw["posts"].get(slug) + if not post: + return False + entry = post.get(platform) + return bool(entry and entry.get("url")) + + def record(self, slug: str, platform: str, payload: dict[str, Any]) -> None: + post = self.raw["posts"].setdefault(slug, {}) + post[platform] = payload + + +def parse_front_matter(text: str) -> tuple[dict[str, Any], str]: + """Parse the small subset of YAML front matter the blog uses. + + The site's posts use simple ``key: value`` pairs (no nesting, no lists), + so a hand-rolled parser keeps this script dependency-free. + """ + if not text.startswith("---\n"): + raise ValueError("missing front matter") + end = text.find("\n---\n", 4) + if end == -1: + raise ValueError("unterminated front matter") + block = text[4:end] + body = text[end + len("\n---\n") :] + + fm: dict[str, Any] = {} + current_key: str | None = None + current_lines: list[str] | None = None + + for raw_line in block.splitlines(): + if current_key is not None and (raw_line.startswith(" ") or raw_line.startswith("\t") or raw_line == ""): + current_lines.append(raw_line) + continue + if current_lines is not None and current_key is not None: + fm[current_key] = _coerce_scalar("\n".join(current_lines).strip()) + current_key = None + current_lines = None + + match = re.match(r"^([A-Za-z0-9_]+):\s*(.*)$", raw_line) + if not match: + continue + key, value = match.group(1), match.group(2) + if value == "": + current_key = key + current_lines = [] + else: + fm[key] = _coerce_scalar(value) + + if current_lines is not None and current_key is not None: + fm[current_key] = _coerce_scalar("\n".join(current_lines).strip()) + + return fm, body + + +def _coerce_scalar(value: str) -> Any: + stripped = value.strip() + if len(stripped) >= 2 and stripped[0] == stripped[-1] and stripped[0] in ("'", '"'): + inner = stripped[1:-1] + if stripped[0] == "'": + inner = inner.replace("''", "'") + return inner + if stripped.lower() in ("true", "false"): + return stripped.lower() == "true" + return stripped + + +def parse_post(path: Path) -> Post | None: + text = path.read_text(encoding="utf-8") + try: + fm, body = parse_front_matter(text) + except ValueError: + return None + date_value = fm.get("date") + if not isinstance(date_value, str): + return None + try: + date = dt.date.fromisoformat(date_value[:10]) + except ValueError: + return None + slug = fm.get("slug") or path.stem + title = fm.get("title") or slug + return Post(path=path, slug=slug, title=str(title), date=date, front_matter=fm, body=body) + + +def discover_posts(blog_dir: Path) -> list[Post]: + posts: list[Post] = [] + for path in sorted(blog_dir.glob("*.md")): + if path.name.startswith("_"): + continue + post = parse_post(path) + if post is not None: + posts.append(post) + posts.sort(key=lambda p: p.date) + return posts + + +def select_candidate( + posts: list[Post], + state: State, + platforms: list[str], + today: dt.date, + floor: dt.date, + min_age_days: int, +) -> Post | None: + cutoff = today - dt.timedelta(days=min_age_days) + for post in posts: + if post.date <= floor: + continue + if post.date > cutoff: + continue + if all(state.is_syndicated(post.slug, p) for p in platforms): + continue + return post + return None + + +_RELATIVE_LINK_RE = re.compile(r"(\]\()(/[^)\s]+)(\))") +_RELATIVE_IMG_RE = re.compile(r'(]*src=["\'])(/[^"\']+)(["\'])', re.IGNORECASE) + + +def absolutize_links(body: str) -> str: + body = _RELATIVE_LINK_RE.sub(lambda m: f"{m.group(1)}{SITE_BASE_URL}{m.group(2)}{m.group(3)}", body) + body = _RELATIVE_IMG_RE.sub(lambda m: f"{m.group(1)}{SITE_BASE_URL}{m.group(2)}{m.group(3)}", body) + return body + + +def insert_blurb(body: str, blurb: str) -> str: + """Insert ``blurb`` after the first non-image paragraph (i.e. after the fold).""" + lines = body.split("\n") + n = len(lines) + i = 0 + # skip leading blank lines + while i < n and lines[i].strip() == "": + i += 1 + # skip a leading header image (a paragraph that is just a markdown image) + if i < n and re.match(r"^!\[[^\]]*\]\([^)]+\)\s*$", lines[i].strip()): + i += 1 + while i < n and lines[i].strip() == "": + i += 1 + # skip the first paragraph of body text + while i < n and lines[i].strip() != "": + i += 1 + # i now points at the blank line (or EOF) following the first text paragraph + insertion = ["", blurb, ""] + return "\n".join(lines[:i] + insertion + lines[i:]) + + +def render_syndicated_body(post: Post) -> str: + body = post.body.strip("\n") + body = absolutize_links(body) + body = insert_blurb(body, CN1_BLURB) + return body + + +def http_post_json(url: str, headers: dict[str, str], payload: dict[str, Any]) -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + request = urllib.request.Request(url, data=data, method="POST") + request.add_header("Content-Type", "application/json") + for key, value in headers.items(): + request.add_header(key, value) + try: + with urllib.request.urlopen(request, timeout=60) as response: + body = response.read().decode("utf-8") + except urllib.error.HTTPError as err: + detail = err.read().decode("utf-8", errors="replace") + raise RuntimeError(f"{url} returned HTTP {err.code}: {detail}") from err + if not body: + return {} + return json.loads(body) + + +def publish_to_devto(post: Post, body_markdown: str, api_key: str) -> dict[str, Any]: + payload: dict[str, Any] = { + "article": { + "title": post.title, + "body_markdown": body_markdown, + "published": True, + "canonical_url": post.canonical_url, + "tags": DEVTO_TAGS, + "description": str(post.front_matter.get("description") or "")[:250] or None, + } + } + cover = post.cover_image + if cover: + payload["article"]["main_image"] = cover + payload["article"] = {k: v for k, v in payload["article"].items() if v is not None} + + response = http_post_json( + "https://dev.to/api/articles", + headers={"api-key": api_key, "Accept": "application/vnd.forem.api-v1+json"}, + payload=payload, + ) + return { + "id": response.get("id"), + "url": response.get("url") or response.get("canonical_url"), + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +def publish_to_hashnode(post: Post, body_markdown: str, token: str, publication_id: str) -> dict[str, Any]: + mutation = """ + mutation PublishPost($input: PublishPostInput!) { + publishPost(input: $input) { + post { id slug url } + } + } + """.strip() + + input_obj: dict[str, Any] = { + "title": post.title, + "contentMarkdown": body_markdown, + "publicationId": publication_id, + "tags": HASHNODE_TAGS, + "originalArticleURL": post.canonical_url, + } + cover = post.cover_image + if cover: + input_obj["coverImageOptions"] = {"coverImageURL": cover} + subtitle = str(post.front_matter.get("description") or "").strip() + if subtitle: + input_obj["subtitle"] = subtitle[:250] + + response = http_post_json( + "https://gql.hashnode.com", + headers={"Authorization": token}, + payload={"query": mutation, "variables": {"input": input_obj}}, + ) + if response.get("errors"): + raise RuntimeError(f"hashnode GraphQL errors: {response['errors']}") + published = (response.get("data") or {}).get("publishPost", {}).get("post", {}) + return { + "id": published.get("id"), + "url": published.get("url"), + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dry-run", action="store_true", help="Do not call any APIs; print what would happen.") + parser.add_argument( + "--platforms", + default="devto,hashnode", + help="Comma-separated subset of platforms to consider (default: devto,hashnode).", + ) + parser.add_argument( + "--today", + default=None, + help="Override today's date (YYYY-MM-DD). Useful for testing.", + ) + parser.add_argument( + "--floor", + default=ELIGIBILITY_FLOOR.isoformat(), + help=f"Posts must be dated strictly after this date (default: {ELIGIBILITY_FLOOR.isoformat()}).", + ) + parser.add_argument( + "--min-age-days", + type=int, + default=MIN_AGE_DAYS, + help=f"Minimum post age in days before syndicating (default: {MIN_AGE_DAYS}).", + ) + parser.add_argument( + "--blog-dir", + default=str(BLOG_DIR), + help="Directory containing Hugo blog posts.", + ) + parser.add_argument( + "--state-file", + default=str(STATE_FILE), + help="Path to syndication state JSON.", + ) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() + floor = dt.date.fromisoformat(args.floor) + platforms = [p.strip() for p in args.platforms.split(",") if p.strip()] + blog_dir = Path(args.blog_dir) + state_file = Path(args.state_file) + + posts = discover_posts(blog_dir) + state = State.load(state_file) + candidate = select_candidate(posts, state, platforms, today, floor, args.min_age_days) + if candidate is None: + print("No syndication candidate found today.") + return 0 + + print(f"Selected post: {candidate.slug} (date={candidate.date.isoformat()})") + body_markdown = render_syndicated_body(candidate) + + devto_key = os.environ.get("DEVTO_API_KEY", "") + hashnode_token = os.environ.get("HASHNODE_TOKEN", "") + hashnode_publication = os.environ.get("HASHNODE_PUBLICATION_ID", "") + + any_change = False + failures: list[str] = [] + + for platform in platforms: + if state.is_syndicated(candidate.slug, platform): + print(f" [{platform}] already syndicated; skipping.") + continue + if args.dry_run: + print(f" [{platform}] dry run — would publish {len(body_markdown)} chars, canonical {candidate.canonical_url}") + continue + try: + if platform == "devto": + if not devto_key: + raise RuntimeError("DEVTO_API_KEY is not set") + result = publish_to_devto(candidate, body_markdown, devto_key) + elif platform == "hashnode": + if not hashnode_token: + raise RuntimeError("HASHNODE_TOKEN is not set") + if not hashnode_publication: + raise RuntimeError("HASHNODE_PUBLICATION_ID is not set") + result = publish_to_hashnode(candidate, body_markdown, hashnode_token, hashnode_publication) + else: + raise RuntimeError(f"unknown platform: {platform}") + except Exception as err: # noqa: BLE001 — surface any failure as per-platform + print(f" [{platform}] FAILED: {err}", file=sys.stderr) + failures.append(platform) + continue + + if not result.get("url"): + print(f" [{platform}] response missing URL: {result}", file=sys.stderr) + failures.append(platform) + continue + + state.record(candidate.slug, platform, result) + any_change = True + print(f" [{platform}] published: {result['url']}") + + if any_change: + state.save(state_file) + print(f"Updated state file: {state_file}") + + if failures: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/scripts/website/syndication-state.json b/scripts/website/syndication-state.json new file mode 100644 index 0000000000..a3a29cbb02 --- /dev/null +++ b/scripts/website/syndication-state.json @@ -0,0 +1,4 @@ +{ + "_comment": "Tracks blog posts syndicated by scripts/website/syndicate_blog_posts.py. Keyed by post slug. Each platform sub-object records the remote URL/id and ISO timestamp once syndication succeeds.", + "posts": {} +} From 30cdd5d56b4ca16135614ca44e36d525b0543ae5 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 12:20:45 +0300 Subject: [PATCH 02/17] Syndicate blog posts to foojay.io as draft Adds foojay.io as a third syndication target. Unlike dev.to and Hashnode the foojay flow creates a WP draft via /wp-json/wp/v2/posts so the foojay editors can review before publishing. The canonical link is surfaced as a visible note at the top of the draft (rather than a meta field) so the reviewer can wire it up using whichever SEO plugin foojay runs. Side effects: - platforms with missing credentials are now skipped at startup with a note instead of failing the whole run, so adding a new platform later does not strand the candidate selector - requests now send a real User-Agent and Accept header (Cloudflare in front of foojay rejected the default Python-urllib UA with error 1010) - foojay credentials (FOOJAY_USER / FOOJAY_PASSWORD) wired through the workflow as optional secrets; the script auto-skips foojay until both are configured Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/blog-syndication.yml | 5 ++ scripts/website/syndicate_blog_posts.py | 107 ++++++++++++++++++++---- 2 files changed, 97 insertions(+), 15 deletions(-) diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml index 741895258e..29f27f8fb6 100644 --- a/.github/workflows/blog-syndication.yml +++ b/.github/workflows/blog-syndication.yml @@ -39,6 +39,11 @@ jobs: DEVTO_API_KEY: ${{ secrets.DEVTO_API_KEY }} HASHNODE_TOKEN: ${{ secrets.HASHNODE_TOKEN }} HASHNODE_PUBLICATION_ID: ${{ secrets.HASHNODE_PUBLICATION_ID }} + # Optional: foojay submits a draft via WP REST. Add FOOJAY_USER / + # FOOJAY_PASSWORD as repo secrets to enable; until then the script + # automatically skips foojay. + FOOJAY_USER: ${{ secrets.FOOJAY_USER }} + FOOJAY_PASSWORD: ${{ secrets.FOOJAY_PASSWORD }} run: | set -euo pipefail if [ "${{ inputs.dry_run }}" = "true" ]; then diff --git a/scripts/website/syndicate_blog_posts.py b/scripts/website/syndicate_blog_posts.py index 404a66ba94..223aed32ed 100755 --- a/scripts/website/syndicate_blog_posts.py +++ b/scripts/website/syndicate_blog_posts.py @@ -19,6 +19,7 @@ from __future__ import annotations import argparse +import base64 import datetime as dt import json import os @@ -54,6 +55,9 @@ {"slug": "ios", "name": "iOS"}, ] +FOOJAY_ENDPOINT = "https://foojay.io/wp-json/wp/v2/posts" +DEFAULT_PLATFORMS = "devto,hashnode,foojay" + @dataclass class Post: @@ -257,10 +261,15 @@ def render_syndicated_body(post: Post) -> str: return body +USER_AGENT = "CodenameOneBlogSyndicator/1.0 (+https://github.com/codenameone/CodenameOne)" + + def http_post_json(url: str, headers: dict[str, str], payload: dict[str, Any]) -> dict[str, Any]: data = json.dumps(payload).encode("utf-8") request = urllib.request.Request(url, data=data, method="POST") request.add_header("Content-Type", "application/json") + request.add_header("User-Agent", USER_AGENT) + request.add_header("Accept", "application/json") for key, value in headers.items(): request.add_header(key, value) try: @@ -302,6 +311,44 @@ def publish_to_devto(post: Post, body_markdown: str, api_key: str) -> dict[str, } +def publish_to_foojay(post: Post, body_markdown: str, user: str, password: str) -> dict[str, Any]: + """Create a draft post on foojay.io via the WordPress REST API. + + foojay editorial reviews and publishes the draft, so the canonical link is + surfaced as a visible note at the top of the post rather than wired into + an SEO plugin's meta field (which varies by site configuration). + """ + canonical_note = ( + f"*Originally published on the [Codename One blog]({post.canonical_url}).*\n\n" + ) + excerpt = str(post.front_matter.get("description") or "").strip() + payload: dict[str, Any] = { + "title": post.title, + "content": canonical_note + body_markdown, + "status": "draft", + } + if excerpt: + payload["excerpt"] = excerpt[:500] + + creds = base64.b64encode(f"{user}:{password}".encode("utf-8")).decode("ascii") + response = http_post_json( + FOOJAY_ENDPOINT, + headers={"Authorization": f"Basic {creds}"}, + payload=payload, + ) + edit_link = None + raw_link = response.get("link") + if isinstance(response.get("id"), int): + edit_link = f"https://foojay.io/wp-admin/post.php?post={response['id']}&action=edit" + return { + "id": response.get("id"), + "url": edit_link or raw_link, + "preview_url": raw_link, + "status": response.get("status"), + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + def publish_to_hashnode(post: Post, body_markdown: str, token: str, publication_id: str) -> dict[str, Any]: mutation = """ mutation PublishPost($input: PublishPostInput!) { @@ -345,8 +392,8 @@ def parse_args(argv: list[str]) -> argparse.Namespace: parser.add_argument("--dry-run", action="store_true", help="Do not call any APIs; print what would happen.") parser.add_argument( "--platforms", - default="devto,hashnode", - help="Comma-separated subset of platforms to consider (default: devto,hashnode).", + default=DEFAULT_PLATFORMS, + help=f"Comma-separated subset of platforms to consider (default: {DEFAULT_PLATFORMS}).", ) parser.add_argument( "--today", @@ -377,14 +424,42 @@ def parse_args(argv: list[str]) -> argparse.Namespace: return parser.parse_args(argv) +def is_platform_configured(platform: str) -> bool: + if platform == "devto": + return bool(os.environ.get("DEVTO_API_KEY")) + if platform == "hashnode": + return bool(os.environ.get("HASHNODE_TOKEN") and os.environ.get("HASHNODE_PUBLICATION_ID")) + if platform == "foojay": + return bool(os.environ.get("FOOJAY_USER") and os.environ.get("FOOJAY_PASSWORD")) + return False + + def main(argv: list[str]) -> int: args = parse_args(argv) today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() floor = dt.date.fromisoformat(args.floor) - platforms = [p.strip() for p in args.platforms.split(",") if p.strip()] + requested_platforms = [p.strip() for p in args.platforms.split(",") if p.strip()] blog_dir = Path(args.blog_dir) state_file = Path(args.state_file) + if args.dry_run: + platforms = requested_platforms + else: + platforms = [] + for platform in requested_platforms: + if is_platform_configured(platform): + platforms.append(platform) + else: + # Skipping an unconfigured platform here (instead of failing) keeps + # the candidate selector from getting stuck on a post that can never + # be fully syndicated. Once the missing creds appear, the next run + # picks up where this one left off. + print(f"[{platform}] credentials not configured; skipping platform.") + + if not platforms: + print("No platforms are configured; nothing to do.") + return 0 + posts = discover_posts(blog_dir) state = State.load(state_file) candidate = select_candidate(posts, state, platforms, today, floor, args.min_age_days) @@ -395,10 +470,6 @@ def main(argv: list[str]) -> int: print(f"Selected post: {candidate.slug} (date={candidate.date.isoformat()})") body_markdown = render_syndicated_body(candidate) - devto_key = os.environ.get("DEVTO_API_KEY", "") - hashnode_token = os.environ.get("HASHNODE_TOKEN", "") - hashnode_publication = os.environ.get("HASHNODE_PUBLICATION_ID", "") - any_change = False failures: list[str] = [] @@ -411,15 +482,21 @@ def main(argv: list[str]) -> int: continue try: if platform == "devto": - if not devto_key: - raise RuntimeError("DEVTO_API_KEY is not set") - result = publish_to_devto(candidate, body_markdown, devto_key) + result = publish_to_devto(candidate, body_markdown, os.environ["DEVTO_API_KEY"]) elif platform == "hashnode": - if not hashnode_token: - raise RuntimeError("HASHNODE_TOKEN is not set") - if not hashnode_publication: - raise RuntimeError("HASHNODE_PUBLICATION_ID is not set") - result = publish_to_hashnode(candidate, body_markdown, hashnode_token, hashnode_publication) + result = publish_to_hashnode( + candidate, + body_markdown, + os.environ["HASHNODE_TOKEN"], + os.environ["HASHNODE_PUBLICATION_ID"], + ) + elif platform == "foojay": + result = publish_to_foojay( + candidate, + body_markdown, + os.environ["FOOJAY_USER"], + os.environ["FOOJAY_PASSWORD"], + ) else: raise RuntimeError(f"unknown platform: {platform}") except Exception as err: # noqa: BLE001 — surface any failure as per-platform From c3429a76b486a1c3ac12d26d481f5d25921d74cb Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 12:21:53 +0300 Subject: [PATCH 03/17] Set foojay canonical via Yoast meta instead of body note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit foojay runs Yoast SEO, so the canonical URL is now sent as meta._yoast_wpseo_canonical on the WP draft. Yoast registers that key as a REST-exposed post meta, so the standard /wp-json/wp/v2/posts payload carries it through. The visible "originally published" line at the top of the draft body is dropped — Yoast handles the SEO directive and the "What is Codename One" blurb still provides reader-facing attribution. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/website/syndicate_blog_posts.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/scripts/website/syndicate_blog_posts.py b/scripts/website/syndicate_blog_posts.py index 223aed32ed..b54de2f826 100755 --- a/scripts/website/syndicate_blog_posts.py +++ b/scripts/website/syndicate_blog_posts.py @@ -314,18 +314,16 @@ def publish_to_devto(post: Post, body_markdown: str, api_key: str) -> dict[str, def publish_to_foojay(post: Post, body_markdown: str, user: str, password: str) -> dict[str, Any]: """Create a draft post on foojay.io via the WordPress REST API. - foojay editorial reviews and publishes the draft, so the canonical link is - surfaced as a visible note at the top of the post rather than wired into - an SEO plugin's meta field (which varies by site configuration). + foojay editorial reviews and publishes the draft. foojay runs Yoast SEO, + which registers _yoast_wpseo_canonical as a REST-exposed post meta, so we + set the canonical there rather than as visible body text. """ - canonical_note = ( - f"*Originally published on the [Codename One blog]({post.canonical_url}).*\n\n" - ) excerpt = str(post.front_matter.get("description") or "").strip() payload: dict[str, Any] = { "title": post.title, - "content": canonical_note + body_markdown, + "content": body_markdown, "status": "draft", + "meta": {"_yoast_wpseo_canonical": post.canonical_url}, } if excerpt: payload["excerpt"] = excerpt[:500] From 4239586ac4585b9c92eb685c65d719f0a0a355fa Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 12:27:11 +0300 Subject: [PATCH 04/17] Drop foojay syndication target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit foojay.io has Wordfence configured to disable WordPress Application Passwords, so there is no usable Basic Auth path for the WP REST API from the syndication script. Removing the foojay code path until / unless foojay editorial offers an alternative auth method (JWT, per-user API key, etc.). The User-Agent header and skip-when-unconfigured behaviour introduced alongside the foojay work are kept — they are useful for the remaining platforms. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/blog-syndication.yml | 5 --- scripts/website/syndicate_blog_posts.py | 49 +------------------------ 2 files changed, 1 insertion(+), 53 deletions(-) diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml index 29f27f8fb6..741895258e 100644 --- a/.github/workflows/blog-syndication.yml +++ b/.github/workflows/blog-syndication.yml @@ -39,11 +39,6 @@ jobs: DEVTO_API_KEY: ${{ secrets.DEVTO_API_KEY }} HASHNODE_TOKEN: ${{ secrets.HASHNODE_TOKEN }} HASHNODE_PUBLICATION_ID: ${{ secrets.HASHNODE_PUBLICATION_ID }} - # Optional: foojay submits a draft via WP REST. Add FOOJAY_USER / - # FOOJAY_PASSWORD as repo secrets to enable; until then the script - # automatically skips foojay. - FOOJAY_USER: ${{ secrets.FOOJAY_USER }} - FOOJAY_PASSWORD: ${{ secrets.FOOJAY_PASSWORD }} run: | set -euo pipefail if [ "${{ inputs.dry_run }}" = "true" ]; then diff --git a/scripts/website/syndicate_blog_posts.py b/scripts/website/syndicate_blog_posts.py index b54de2f826..cb32d2627c 100755 --- a/scripts/website/syndicate_blog_posts.py +++ b/scripts/website/syndicate_blog_posts.py @@ -19,7 +19,6 @@ from __future__ import annotations import argparse -import base64 import datetime as dt import json import os @@ -55,8 +54,7 @@ {"slug": "ios", "name": "iOS"}, ] -FOOJAY_ENDPOINT = "https://foojay.io/wp-json/wp/v2/posts" -DEFAULT_PLATFORMS = "devto,hashnode,foojay" +DEFAULT_PLATFORMS = "devto,hashnode" @dataclass @@ -311,42 +309,6 @@ def publish_to_devto(post: Post, body_markdown: str, api_key: str) -> dict[str, } -def publish_to_foojay(post: Post, body_markdown: str, user: str, password: str) -> dict[str, Any]: - """Create a draft post on foojay.io via the WordPress REST API. - - foojay editorial reviews and publishes the draft. foojay runs Yoast SEO, - which registers _yoast_wpseo_canonical as a REST-exposed post meta, so we - set the canonical there rather than as visible body text. - """ - excerpt = str(post.front_matter.get("description") or "").strip() - payload: dict[str, Any] = { - "title": post.title, - "content": body_markdown, - "status": "draft", - "meta": {"_yoast_wpseo_canonical": post.canonical_url}, - } - if excerpt: - payload["excerpt"] = excerpt[:500] - - creds = base64.b64encode(f"{user}:{password}".encode("utf-8")).decode("ascii") - response = http_post_json( - FOOJAY_ENDPOINT, - headers={"Authorization": f"Basic {creds}"}, - payload=payload, - ) - edit_link = None - raw_link = response.get("link") - if isinstance(response.get("id"), int): - edit_link = f"https://foojay.io/wp-admin/post.php?post={response['id']}&action=edit" - return { - "id": response.get("id"), - "url": edit_link or raw_link, - "preview_url": raw_link, - "status": response.get("status"), - "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), - } - - def publish_to_hashnode(post: Post, body_markdown: str, token: str, publication_id: str) -> dict[str, Any]: mutation = """ mutation PublishPost($input: PublishPostInput!) { @@ -427,8 +389,6 @@ def is_platform_configured(platform: str) -> bool: return bool(os.environ.get("DEVTO_API_KEY")) if platform == "hashnode": return bool(os.environ.get("HASHNODE_TOKEN") and os.environ.get("HASHNODE_PUBLICATION_ID")) - if platform == "foojay": - return bool(os.environ.get("FOOJAY_USER") and os.environ.get("FOOJAY_PASSWORD")) return False @@ -488,13 +448,6 @@ def main(argv: list[str]) -> int: os.environ["HASHNODE_TOKEN"], os.environ["HASHNODE_PUBLICATION_ID"], ) - elif platform == "foojay": - result = publish_to_foojay( - candidate, - body_markdown, - os.environ["FOOJAY_USER"], - os.environ["FOOJAY_PASSWORD"], - ) else: raise RuntimeError(f"unknown platform: {platform}") except Exception as err: # noqa: BLE001 — surface any failure as per-platform From d0ab980f5f615f8dae6fb29359cd948778a796e8 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 13:08:47 +0300 Subject: [PATCH 05/17] Browser-driven syndication for foojay, HackerNoon, DZone, Medium MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds scripts/website/syndicate_browser_posts.py — a Playwright-based counterpart to the API syndicator. Each target site has its own adapter (login + draft submission). State and post selection are shared with the API script via syndication-state.json, so a post is "candidate" until all configured platforms — API and browser — have a record. Adapters: - foojay: hybrid path. Playwright drives wp-login.php to obtain a real session (Wordfence has Application Passwords disabled, so token auth is out), then the script POSTs the draft via /wp-json/wp/v2/posts using the session cookies + X-WP-Nonce. Pure UI submission was attempted but Cloudflare in front of foojay challenges form POSTs and drops the payload, so drafts never landed. Yoast canonical isn't REST-writable on this Yoast install, so the canonical is surfaced as a visible note at the top of the draft body for the editor reviewer. Validated end- to-end against the live site (draft #123656). - hackernoon, dzone, medium: standard browser flow. Selectors are best-effort and need a one-time validation pass against each live site via --validate-only --headed. medium has no password login, so it relies on a base64-encoded MEDIUM_STORAGE_STATE secret exported from a manually logged-in browser session. Workflow additions: - Detects whether any browser-syndication secret is configured; only installs Playwright + Chromium when something will actually run. - Uploads the Playwright screenshot directory as a CI artifact on any outcome (kept for 14 days), so selector failures are debuggable. - Screenshots dir is gitignored. Per-platform secrets (all optional; missing = platform skipped): FOOJAY_USER, FOOJAY_PASSWORD HACKERNOON_USER, HACKERNOON_PASSWORD DZONE_USER, DZONE_PASSWORD MEDIUM_STORAGE_STATE Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/blog-syndication.yml | 52 +- .../syndication-screenshots/.gitignore | 2 + scripts/website/syndicate_browser_posts.py | 619 ++++++++++++++++++ 3 files changed, 671 insertions(+), 2 deletions(-) create mode 100644 docs/website/reports/syndication-screenshots/.gitignore create mode 100755 scripts/website/syndicate_browser_posts.py diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml index 741895258e..3a22d8bb87 100644 --- a/.github/workflows/blog-syndication.yml +++ b/.github/workflows/blog-syndication.yml @@ -33,8 +33,8 @@ jobs: with: python-version: '3.12' - - name: Run syndication script - id: syndicate + - name: Run API syndication script + id: syndicate_api env: DEVTO_API_KEY: ${{ secrets.DEVTO_API_KEY }} HASHNODE_TOKEN: ${{ secrets.HASHNODE_TOKEN }} @@ -47,6 +47,54 @@ jobs: python3 scripts/website/syndicate_blog_posts.py fi + - name: Detect browser-syndication credentials + id: browser_creds + env: + FOOJAY_USER: ${{ secrets.FOOJAY_USER }} + HACKERNOON_USER: ${{ secrets.HACKERNOON_USER }} + DZONE_USER: ${{ secrets.DZONE_USER }} + MEDIUM_STORAGE_STATE: ${{ secrets.MEDIUM_STORAGE_STATE }} + run: | + if [ -n "${FOOJAY_USER}" ] || [ -n "${HACKERNOON_USER}" ] || [ -n "${DZONE_USER}" ] || [ -n "${MEDIUM_STORAGE_STATE}" ]; then + echo "any_configured=true" >> "${GITHUB_OUTPUT}" + else + echo "any_configured=false" >> "${GITHUB_OUTPUT}" + fi + + - name: Install Playwright dependencies + if: ${{ steps.browser_creds.outputs.any_configured == 'true' }} + run: | + set -euo pipefail + pip install playwright + playwright install --with-deps chromium + + - name: Run browser syndication script + if: ${{ steps.browser_creds.outputs.any_configured == 'true' }} + env: + FOOJAY_USER: ${{ secrets.FOOJAY_USER }} + FOOJAY_PASSWORD: ${{ secrets.FOOJAY_PASSWORD }} + HACKERNOON_USER: ${{ secrets.HACKERNOON_USER }} + HACKERNOON_PASSWORD: ${{ secrets.HACKERNOON_PASSWORD }} + DZONE_USER: ${{ secrets.DZONE_USER }} + DZONE_PASSWORD: ${{ secrets.DZONE_PASSWORD }} + MEDIUM_STORAGE_STATE: ${{ secrets.MEDIUM_STORAGE_STATE }} + run: | + set -euo pipefail + if [ "${{ inputs.dry_run }}" = "true" ]; then + python3 scripts/website/syndicate_browser_posts.py --dry-run + else + python3 scripts/website/syndicate_browser_posts.py + fi + + - name: Upload syndication screenshots on failure + if: ${{ always() && hashFiles('docs/website/reports/syndication-screenshots/**/*.png') != '' }} + uses: actions/upload-artifact@v4 + with: + name: syndication-screenshots + path: docs/website/reports/syndication-screenshots/ + if-no-files-found: ignore + retention-days: 14 + - name: Commit updated syndication state if: ${{ inputs.dry_run != true }} run: | diff --git a/docs/website/reports/syndication-screenshots/.gitignore b/docs/website/reports/syndication-screenshots/.gitignore new file mode 100644 index 0000000000..5b502f9bd1 --- /dev/null +++ b/docs/website/reports/syndication-screenshots/.gitignore @@ -0,0 +1,2 @@ +*.png +!.gitignore diff --git a/scripts/website/syndicate_browser_posts.py b/scripts/website/syndicate_browser_posts.py new file mode 100755 index 0000000000..ae0fe6bd84 --- /dev/null +++ b/scripts/website/syndicate_browser_posts.py @@ -0,0 +1,619 @@ +#!/usr/bin/env python3 +"""Syndicate Codename One Hugo blog posts to sites that have no usable API. + +Counterpart to ``syndicate_blog_posts.py``: instead of POSTing to a REST/ +GraphQL endpoint, this script drives a real (headless) browser via Playwright +and submits the post through the site's normal authoring UI as a draft for +editorial review. Shares ``Post`` discovery, body rendering, and the +``syndication-state.json`` state file with the API-based script. + +Adapters (one class per target site) live at the bottom of this file. Each +adapter exposes a ``login()`` and a ``submit_draft()`` step. Selectors are +kept as constants at the top of each adapter so they are easy to update when +the site changes its UI — which it will, so plan on it. + +Usage: + + # First-time setup, watch the browser, take screenshots of the editor: + python3 scripts/website/syndicate_browser_posts.py \ + --platforms foojay --validate-only --headed --today 2026-05-08 + + # Real syndication (headless, daily-cron style): + python3 scripts/website/syndicate_browser_posts.py --platforms foojay,hackernoon + +Required env vars per platform (script auto-skips a platform when its creds +are missing, just like the API script): + + foojay : FOOJAY_USER, FOOJAY_PASSWORD + hackernoon : HACKERNOON_USER, HACKERNOON_PASSWORD + dzone : DZONE_USER, DZONE_PASSWORD + medium : MEDIUM_STORAGE_STATE (base64-encoded Playwright storageState + JSON exported from a logged-in session + — Medium has no password login flow) +""" + +from __future__ import annotations + +import argparse +import base64 +import datetime as dt +import json +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable + +# Reuse the API-based script's discovery, body rendering, and state machinery. +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from syndicate_blog_posts import ( # noqa: E402 (intentional path injection) + BLOG_DIR, + ELIGIBILITY_FLOOR, + MIN_AGE_DAYS, + Post, + STATE_FILE, + State, + discover_posts, + render_syndicated_body, + select_candidate, +) + + +SCREENSHOT_DIR = Path(__file__).resolve().parents[2] / "docs" / "website" / "reports" / "syndication-screenshots" +DEFAULT_PLATFORMS = "foojay,hackernoon,dzone,medium" + + +@dataclass +class AdapterContext: + post: Post + body_markdown: str + headed: bool + validate_only: bool + + +# --------------------------------------------------------------------------- # +# Adapters # +# --------------------------------------------------------------------------- # + + +class AdapterError(RuntimeError): + """Raised when an adapter cannot complete its flow.""" + + +def _find_first(page, selectors: list[str], *, timeout: int = 15000): + """Try each selector in turn; return the first that becomes visible. + + Adapters list multiple plausible selectors per field so a small UI tweak + on the target site does not break the run. The first match wins. + """ + last_error: Exception | None = None + for selector in selectors: + try: + handle = page.wait_for_selector(selector, timeout=timeout, state="visible") + if handle: + return handle + except Exception as err: # noqa: BLE001 — Playwright TimeoutError, etc. + last_error = err + continue + raise AdapterError(f"none of the selectors matched: {selectors}: {last_error}") + + +def _save_screenshot(page, slug: str, label: str) -> Path: + SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True) + stamp = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ") + path = SCREENSHOT_DIR / f"{slug}-{label}-{stamp}.png" + try: + page.screenshot(path=str(path), full_page=True) + except Exception: # noqa: BLE001 — never let a screenshot failure mask the real error + return path + return path + + +class FoojayAdapter: + """foojay.io — Playwright login + REST API draft creation. + + Pure UI submission to foojay does not work reliably: Cloudflare in front + of foojay challenges form POSTs to /wp-admin/post.php and drops the + form payload during the challenge, so the draft is never created. The + REST API is not subject to the same challenge, but Wordfence has + Application Passwords disabled, so token auth is also out. + + The working hybrid: drive wp-login.php with Playwright to obtain a real + user session (cookies), pull the WP REST nonce from /wp-admin/, then + POST the draft through /wp-json/wp/v2/posts with cookie + X-WP-Nonce + auth. Behaves "as a website user" end-to-end while sidestepping both + the app-password block and the Cloudflare POST challenge. + """ + + name = "foojay" + LOGIN_URL = "https://foojay.io/wp-login.php" + REST_POSTS_ENDPOINT = "https://foojay.io/wp-json/wp/v2/posts" + + USER_SELECTORS = ["#user_login"] + PASSWORD_SELECTORS = ["#user_pass"] + SUBMIT_SELECTORS = ["#wp-submit"] + + @staticmethod + def is_configured() -> bool: + return bool(os.environ.get("FOOJAY_USER") and os.environ.get("FOOJAY_PASSWORD")) + + def login(self, page) -> None: + page.goto(self.LOGIN_URL, wait_until="domcontentloaded") + _find_first(page, self.USER_SELECTORS).fill(os.environ["FOOJAY_USER"]) + _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["FOOJAY_PASSWORD"]) + _find_first(page, self.SUBMIT_SELECTORS).click() + try: + page.wait_for_url("**/wp-admin/**", timeout=90000) + except Exception: # noqa: BLE001 + page.wait_for_selector("#wpadminbar", timeout=30000) + + def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: + # Land on wp-admin so wpApiSettings (which carries the nonce) is in scope. + page.goto("https://foojay.io/wp-admin/", wait_until="domcontentloaded", timeout=60000) + nonce = page.evaluate( + "() => (window.wpApiSettings && window.wpApiSettings.nonce) || null" + ) + if not nonce: + raise AdapterError("could not extract wpApiSettings.nonce from /wp-admin/") + + if ctx.validate_only: + shot = _save_screenshot(page, ctx.post.slug, "foojay-editor") + return {"validated": True, "screenshot": str(shot), "nonce_acquired": True} + + cookies = page.context.cookies("https://foojay.io/") + cookie_header = "; ".join(f"{c['name']}={c['value']}" for c in cookies) + + # Yoast canonical (_yoast_wpseo_canonical) is not registered for REST + # writes on foojay's Yoast install — POSTing it via meta is silently + # ignored. Surface the canonical as a visible note at the top of the + # body instead, so the editor reviewer can wire it into Yoast's UI + # field before publishing. + canonical_prefix = ( + f"\n\n" + f"*Originally published on the [Codename One blog]({ctx.post.canonical_url}).*\n\n" + ) + + payload: dict[str, Any] = { + "title": ctx.post.title, + "content": canonical_prefix + ctx.body_markdown, + "status": "draft", + } + excerpt = str(ctx.post.front_matter.get("description") or "").strip() + if excerpt: + payload["excerpt"] = excerpt[:500] + + import urllib.error as _ue + import urllib.request as _ur + request = _ur.Request( + self.REST_POSTS_ENDPOINT, + data=json.dumps(payload).encode("utf-8"), + method="POST", + ) + request.add_header("Content-Type", "application/json") + request.add_header("Accept", "application/json") + request.add_header("X-WP-Nonce", nonce) + request.add_header("Cookie", cookie_header) + request.add_header( + "User-Agent", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + ) + try: + with _ur.urlopen(request, timeout=60) as response: + body = response.read().decode("utf-8") + except _ue.HTTPError as err: + detail = err.read().decode("utf-8", errors="replace") + raise AdapterError(f"REST POST failed HTTP {err.code}: {detail}") from err + + data = json.loads(body) if body else {} + post_id = data.get("id") + if not post_id: + raise AdapterError(f"REST response missing post id: {data}") + return { + "id": post_id, + "url": f"https://foojay.io/wp-admin/post.php?post={post_id}&action=edit", + "preview_url": data.get("link"), + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +class HackerNoonAdapter: + """HackerNoon — app.hackernoon.com email/password login + their own editor. + + Selectors below have NOT been validated against the live site. Run with + ``--validate-only --headed`` first and update them if the run fails. + """ + + name = "hackernoon" + LOGIN_URL = "https://app.hackernoon.com/sign-in" + EDITOR_URL = "https://app.hackernoon.com/new-story" + + USER_SELECTORS = [ + "input[type='email']", + "input[name='email']", + "input[placeholder*='mail' i]", + ] + PASSWORD_SELECTORS = [ + "input[type='password']", + "input[name='password']", + ] + SUBMIT_SELECTORS = [ + "button[type='submit']", + "button:has-text('Sign in')", + "button:has-text('Log in')", + ] + TITLE_SELECTORS = [ + "[data-placeholder='Title']", + "[contenteditable='true'][placeholder*='Title']", + "textarea[placeholder*='Title']", + ] + BODY_SELECTORS = [ + "[data-placeholder*='your story' i]", + "[contenteditable='true'][placeholder*='Tell your story' i]", + ] + SAVE_DRAFT_SELECTORS = [ + "button:has-text('Save Draft')", + "button:has-text('Save as draft')", + ] + CANONICAL_FIELD_SELECTORS = [ + "input[name='canonical']", + "input[placeholder*='canonical' i]", + "input[placeholder*='original URL' i]", + ] + + @staticmethod + def is_configured() -> bool: + return bool(os.environ.get("HACKERNOON_USER") and os.environ.get("HACKERNOON_PASSWORD")) + + def login(self, page) -> None: + page.goto(self.LOGIN_URL, wait_until="domcontentloaded") + _find_first(page, self.USER_SELECTORS).fill(os.environ["HACKERNOON_USER"]) + _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["HACKERNOON_PASSWORD"]) + _find_first(page, self.SUBMIT_SELECTORS).click() + page.wait_for_url("**/dashboard*", timeout=25000) + + def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: + page.goto(self.EDITOR_URL, wait_until="domcontentloaded") + _find_first(page, self.TITLE_SELECTORS).fill(ctx.post.title) + + body_field = _find_first(page, self.BODY_SELECTORS) + body_field.click() + page.evaluate("text => navigator.clipboard.writeText(text)", ctx.body_markdown) + page.keyboard.press("Meta+V" if sys.platform == "darwin" else "Control+V") + + # Canonical field lives behind a settings panel; selectors here will + # almost certainly need adjustment. Don't fail the run if it's hidden. + try: + field = _find_first(page, self.CANONICAL_FIELD_SELECTORS, timeout=3000) + field.fill(ctx.post.canonical_url) + except AdapterError: + pass + + if ctx.validate_only: + shot = _save_screenshot(page, ctx.post.slug, "hackernoon-editor") + return {"validated": True, "screenshot": str(shot)} + + _find_first(page, self.SAVE_DRAFT_SELECTORS).click() + # Drafts land at /draft/ on HackerNoon + page.wait_for_url("**/draft/**", timeout=20000) + return { + "url": page.url, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +class DZoneAdapter: + """DZone — email/password login, TinyMCE-based article editor. + + Selectors below have NOT been validated against the live site. The body + field is inside a TinyMCE iframe; the adapter pastes the markdown as + plain text and the editor will need a manual cleanup pass before publish. + """ + + name = "dzone" + LOGIN_URL = "https://dzone.com/users/login.html" + EDITOR_URL = "https://dzone.com/articles/new" + + USER_SELECTORS = ["input[name='username']", "input[type='email']"] + PASSWORD_SELECTORS = ["input[name='password']", "input[type='password']"] + SUBMIT_SELECTORS = ["button[type='submit']", "button:has-text('Log in')"] + TITLE_SELECTORS = ["input[name='title']", "input#title"] + BODY_IFRAME_SELECTORS = ["iframe.tox-edit-area__iframe", "iframe[id$='_ifr']"] + CANONICAL_SELECTORS = [ + "input[name='originalUrl']", + "input[name='canonicalUrl']", + "input[placeholder*='canonical' i]", + "input[placeholder*='original' i]", + ] + SAVE_DRAFT_SELECTORS = [ + "button:has-text('Save Draft')", + "button:has-text('Save as draft')", + "button[name='saveDraft']", + ] + + @staticmethod + def is_configured() -> bool: + return bool(os.environ.get("DZONE_USER") and os.environ.get("DZONE_PASSWORD")) + + def login(self, page) -> None: + page.goto(self.LOGIN_URL, wait_until="domcontentloaded") + _find_first(page, self.USER_SELECTORS).fill(os.environ["DZONE_USER"]) + _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["DZONE_PASSWORD"]) + _find_first(page, self.SUBMIT_SELECTORS).click() + page.wait_for_load_state("networkidle", timeout=20000) + + def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: + page.goto(self.EDITOR_URL, wait_until="domcontentloaded") + _find_first(page, self.TITLE_SELECTORS).fill(ctx.post.title) + + # TinyMCE body lives inside an iframe. + iframe_handle = _find_first(page, self.BODY_IFRAME_SELECTORS) + frame = iframe_handle.content_frame() + if frame is None: + raise AdapterError("could not access TinyMCE iframe content frame") + frame.locator("body").click() + # Paste raw markdown as text — DZone's editor accepts it but renders + # it as plain text. Editor reviewer will tidy before publishing. + page.evaluate("text => navigator.clipboard.writeText(text)", ctx.body_markdown) + frame.locator("body").press("Meta+V" if sys.platform == "darwin" else "Control+V") + + try: + field = _find_first(page, self.CANONICAL_SELECTORS, timeout=3000) + field.fill(ctx.post.canonical_url) + except AdapterError: + pass + + if ctx.validate_only: + shot = _save_screenshot(page, ctx.post.slug, "dzone-editor") + return {"validated": True, "screenshot": str(shot)} + + _find_first(page, self.SAVE_DRAFT_SELECTORS).click() + page.wait_for_load_state("networkidle", timeout=20000) + return { + "url": page.url, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +class MediumAdapter: + """Medium — no password login flow, so this adapter relies on a saved + Playwright storageState (cookies + localStorage) loaded from the + ``MEDIUM_STORAGE_STATE`` env var (base64-encoded JSON). + + To produce one: + + $ python3 -c "from playwright.sync_api import sync_playwright as p; \\ + ctx=p().start().chromium.launch(headless=False).new_context(); \\ + page=ctx.new_page(); page.goto('https://medium.com/m/signin'); \\ + input('Log in then press Enter...'); \\ + ctx.storage_state(path='medium-state.json')" + $ base64 -i medium-state.json | pbcopy # paste as MEDIUM_STORAGE_STATE + """ + + name = "medium" + EDITOR_URL = "https://medium.com/new-story" + + TITLE_SELECTORS = [ + "h3[data-default-value*='Title' i]", + "[data-default-value*='Title' i]", + "h3.graf--title", + ] + BODY_SELECTORS = [ + "[data-default-value*='your story' i]", + "p[data-default-value*='Tell your story' i]", + ".section-content [contenteditable='true']", + ] + SETTINGS_BUTTON_SELECTORS = [ + "button:has-text('Story settings')", + "button[aria-label*='settings' i]", + ] + CANONICAL_FIELD_SELECTORS = [ + "input[placeholder*='canonical' i]", + "input[placeholder*='URL of original' i]", + ] + SAVE_AS_DRAFT_SELECTORS = [ + # Medium auto-saves drafts; an explicit Save Draft action isn't + # always necessary, but the keyboard shortcut Cmd/Ctrl+S works. + ] + + @staticmethod + def is_configured() -> bool: + return bool(os.environ.get("MEDIUM_STORAGE_STATE")) + + @staticmethod + def storage_state_path() -> Path: + encoded = os.environ["MEDIUM_STORAGE_STATE"] + decoded = base64.b64decode(encoded) + path = Path("/tmp/medium-storage-state.json") + path.write_bytes(decoded) + return path + + def login(self, page) -> None: + # No-op: storage state was loaded into the browser context already. + return + + def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: + page.goto(self.EDITOR_URL, wait_until="domcontentloaded") + _find_first(page, self.TITLE_SELECTORS).fill(ctx.post.title) + body_field = _find_first(page, self.BODY_SELECTORS) + body_field.click() + page.evaluate("text => navigator.clipboard.writeText(text)", ctx.body_markdown) + page.keyboard.press("Meta+V" if sys.platform == "darwin" else "Control+V") + + # Set canonical via Story settings panel. + try: + _find_first(page, self.SETTINGS_BUTTON_SELECTORS, timeout=3000).click() + field = _find_first(page, self.CANONICAL_FIELD_SELECTORS, timeout=5000) + field.fill(ctx.post.canonical_url) + # Close the settings panel + page.keyboard.press("Escape") + except AdapterError: + pass + + if ctx.validate_only: + shot = _save_screenshot(page, ctx.post.slug, "medium-editor") + return {"validated": True, "screenshot": str(shot)} + + # Force-save the draft via keyboard shortcut, then capture the URL. + page.keyboard.press("Meta+S" if sys.platform == "darwin" else "Control+S") + page.wait_for_timeout(2000) + return { + "url": page.url, + "syndicated_at": dt.datetime.now(dt.timezone.utc).isoformat(timespec="seconds"), + } + + +ADAPTERS: dict[str, Callable[[], Any]] = { + "foojay": FoojayAdapter, + "hackernoon": HackerNoonAdapter, + "dzone": DZoneAdapter, + "medium": MediumAdapter, +} + + +# --------------------------------------------------------------------------- # +# Driver # +# --------------------------------------------------------------------------- # + + +def run_adapter(adapter, post: Post, body_markdown: str, headed: bool, validate_only: bool) -> dict[str, Any]: + from playwright.sync_api import sync_playwright + + with sync_playwright() as pw: + launch_kwargs: dict[str, Any] = {"headless": not headed} + browser = pw.chromium.launch(**launch_kwargs) + context_kwargs: dict[str, Any] = { + "viewport": {"width": 1400, "height": 900}, + "user_agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + ), + } + if isinstance(adapter, MediumAdapter): + context_kwargs["storage_state"] = str(MediumAdapter.storage_state_path()) + + context = browser.new_context(**context_kwargs) + # Grant clipboard access so navigator.clipboard.writeText() succeeds. + try: + context.grant_permissions(["clipboard-read", "clipboard-write"]) + except Exception: # noqa: BLE001 — Firefox/WebKit don't support all permissions + pass + + page = context.new_page() + ctx = AdapterContext(post=post, body_markdown=body_markdown, headed=headed, validate_only=validate_only) + + try: + adapter.login(page) + result = adapter.submit_draft(page, ctx) + except Exception as err: # noqa: BLE001 + shot = _save_screenshot(page, post.slug, f"{adapter.name}-error") + raise AdapterError(f"{adapter.name} flow failed (screenshot: {shot}): {err}") from err + finally: + context.close() + browser.close() + return result + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--platforms", default=DEFAULT_PLATFORMS, + help=f"Comma-separated platforms (default: {DEFAULT_PLATFORMS}).") + parser.add_argument("--dry-run", action="store_true", + help="No browser launched; just print what would happen.") + parser.add_argument("--headed", action="store_true", + help="Run with a visible browser (for local debugging).") + parser.add_argument("--validate-only", action="store_true", + help="Log in and open the editor, then screenshot and exit without submitting.") + parser.add_argument("--today", default=None, help="Override today's date (YYYY-MM-DD).") + parser.add_argument("--floor", default=ELIGIBILITY_FLOOR.isoformat(), + help=f"Posts must be dated strictly after this date (default: {ELIGIBILITY_FLOOR.isoformat()}).") + parser.add_argument("--min-age-days", type=int, default=MIN_AGE_DAYS, + help=f"Minimum post age in days (default: {MIN_AGE_DAYS}).") + parser.add_argument("--blog-dir", default=str(BLOG_DIR)) + parser.add_argument("--state-file", default=str(STATE_FILE)) + return parser.parse_args(argv) + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + today = dt.date.fromisoformat(args.today) if args.today else dt.date.today() + floor = dt.date.fromisoformat(args.floor) + requested = [p.strip() for p in args.platforms.split(",") if p.strip()] + blog_dir = Path(args.blog_dir) + state_file = Path(args.state_file) + + unknown = [p for p in requested if p not in ADAPTERS] + if unknown: + print(f"Unknown platform(s): {unknown}. Known: {sorted(ADAPTERS)}", file=sys.stderr) + return 1 + + adapters: list[Any] = [] + for name in requested: + adapter = ADAPTERS[name]() + if args.dry_run or args.validate_only or adapter.is_configured(): + adapters.append(adapter) + else: + print(f"[{name}] credentials not configured; skipping platform.") + + if not adapters: + print("No browser platforms are configured; nothing to do.") + return 0 + + posts = discover_posts(blog_dir) + state = State.load(state_file) + platform_names = [a.name for a in adapters] + candidate = select_candidate(posts, state, platform_names, today, floor, args.min_age_days) + if candidate is None and not args.validate_only: + print("No syndication candidate found today.") + return 0 + if candidate is None and args.validate_only: + # In validate-only mode, fall back to the newest post so we can still + # verify selectors even when nothing is technically due. + candidate = posts[-1] + print(f"validate-only: using newest post {candidate.slug} for selector verification.") + + print(f"Selected post: {candidate.slug} (date={candidate.date.isoformat()})") + body_markdown = render_syndicated_body(candidate) + + any_change = False + failures: list[str] = [] + + for adapter in adapters: + if state.is_syndicated(candidate.slug, adapter.name) and not args.validate_only: + print(f" [{adapter.name}] already syndicated; skipping.") + continue + if args.dry_run: + print(f" [{adapter.name}] dry run — would publish {len(body_markdown)} chars, " + f"canonical {candidate.canonical_url}") + continue + try: + result = run_adapter(adapter, candidate, body_markdown, args.headed, args.validate_only) + except Exception as err: # noqa: BLE001 + print(f" [{adapter.name}] FAILED: {err}", file=sys.stderr) + failures.append(adapter.name) + continue + + if args.validate_only: + print(f" [{adapter.name}] validated. {json.dumps(result)}") + continue + + if not result.get("url"): + print(f" [{adapter.name}] response missing URL: {result}", file=sys.stderr) + failures.append(adapter.name) + continue + + state.record(candidate.slug, adapter.name, result) + any_change = True + print(f" [{adapter.name}] published draft: {result['url']}") + + if any_change: + state.save(state_file) + print(f"Updated state file: {state_file}") + + if failures: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) From 1af86e4a32ea6a6352dd357a243e5d6ab861f6b3 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 13:25:09 +0300 Subject: [PATCH 06/17] Add Medium storage-state export helper scripts/website/export_medium_storage.py captures a logged-in Medium session as a base64 blob suitable for the MEDIUM_STORAGE_STATE secret that the browser syndicator's MediumAdapter requires. Three modes: - --from-firefox-profile (no second login): reads cookies.sqlite from the user's existing Firefox profile and builds the storage state JSON directly. Auto-detects the most recently used profile under ~/Library/Application Support/Firefox/Profiles/. Refuses to write state if the profile is not actually logged in (uid cookie missing or prefixed with `lo_`). - --browser firefox|chrome|chromium|msedge: launches Playwright with the requested browser, opens medium.com/m/signin, and polls cookies every 3s until a non-`lo_` uid appears. 10-minute timeout default. - --interactive: same launch but waits on stdin instead of polling (useful when running attached to a real terminal). Output is written as JSON to --output and (unless --no-base64) printed as a base64 blob ready to paste as a repo secret. The local JSON file is gitignored. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 2 + scripts/website/export_medium_storage.py | 254 +++++++++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100755 scripts/website/export_medium_storage.py diff --git a/.gitignore b/.gitignore index 82e5eba372..20d8fd12cd 100644 --- a/.gitignore +++ b/.gitignore @@ -30,6 +30,8 @@ **/dist/* *.zip CodenameOneDesigner/src/version.properties +medium-storage-state.json +medium-storage-state.*.json /Ports/iOSPort/build/ /Ports/iOSPort/dist/ Ports/iOSPort/nbproject/private/private.xml diff --git a/scripts/website/export_medium_storage.py b/scripts/website/export_medium_storage.py new file mode 100755 index 0000000000..b30c8f8cce --- /dev/null +++ b/scripts/website/export_medium_storage.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +"""Export a Medium login session as a base64 string for the MEDIUM_STORAGE_STATE secret. + +Medium has no password-based REST API and no password-based login form — +only OAuth (Google/Apple/Facebook) and email magic-link. The browser-based +syndicator therefore needs a saved Playwright storageState (cookies + +localStorage) instead of credentials. + +Usage: + + python3 scripts/website/export_medium_storage.py + # opens a headed browser; you log in; press Enter when done + # script writes ./medium-storage-state.json AND prints the base64 + # representation that you paste as the MEDIUM_STORAGE_STATE secret + +If the user has Google Chrome installed, the script uses Chrome (channel= +"chrome") so the browser the user sees is the same brand they are used to. +Otherwise it falls back to Playwright's bundled Chromium. +""" + +from __future__ import annotations + +import argparse +import base64 +import glob +import json +import shutil +import sqlite3 +import sys +import tempfile +import time +from pathlib import Path + + +DEFAULT_OUTPUT = Path("medium-storage-state.json") +SIGNIN_URL = "https://medium.com/m/signin" +# Medium assigns every visitor a `uid` cookie. Anonymous visitors get a value +# prefixed with `lo_` ("logged-out"); a real signed-in user gets a value +# without that prefix. We use that distinction to detect login completion. +ANON_UID_PREFIX = "lo_" +DEFAULT_TIMEOUT_SECONDS = 600 # 10 minutes for the user to complete login + + +def _is_logged_in(cookies: list[dict]) -> bool: + for c in cookies: + if c.get("name") == "uid": + value = c.get("value") or "" + if value and not value.startswith(ANON_UID_PREFIX): + return True + return False + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--output", default=str(DEFAULT_OUTPUT), + help=f"Path to write the storage state JSON (default: {DEFAULT_OUTPUT})") + parser.add_argument("--no-base64", action="store_true", + help="Skip printing the base64 blob (just write the JSON file).") + parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SECONDS, + help="Maximum seconds to wait for login completion (default: 600).") + parser.add_argument("--interactive", action="store_true", + help="Wait for Enter on stdin instead of polling for auth cookies.") + parser.add_argument("--browser", default="chrome", choices=["chrome", "chromium", "firefox", "msedge"], + help="Which Playwright browser to launch (default: chrome).") + parser.add_argument("--from-firefox-profile", nargs="?", const="auto", default=None, + help=("Skip launching a browser and instead read medium.com cookies from an " + "existing Firefox profile's cookies.sqlite. Pass a path or omit for auto-detect.")) + return parser.parse_args(argv) + + +def _locate_firefox_profile(explicit: str | None) -> Path: + if explicit and explicit != "auto": + path = Path(explicit).expanduser() + if path.is_file(): + return path + if path.is_dir(): + candidate = path / "cookies.sqlite" + if candidate.is_file(): + return candidate + raise RuntimeError(f"Firefox cookies.sqlite not found at {path}") + # Auto-detect macOS Firefox profile. + base = Path.home() / "Library" / "Application Support" / "Firefox" / "Profiles" + if not base.exists(): + # Linux / other-OS fallbacks. + for guess in (Path.home() / ".mozilla" / "firefox", Path.home() / "snap" / "firefox" / "common" / ".mozilla" / "firefox"): + if guess.exists(): + base = guess + break + if not base.exists(): + raise RuntimeError("Could not locate a Firefox profiles directory.") + candidates = sorted(glob.glob(str(base / "*default*" / "cookies.sqlite"))) or sorted( + glob.glob(str(base / "*" / "cookies.sqlite")) + ) + if not candidates: + raise RuntimeError(f"No cookies.sqlite found under {base}") + # Prefer the most recently modified profile. + return Path(max(candidates, key=lambda p: Path(p).stat().st_mtime)) + + +def _firefox_storage_state(cookies_db: Path) -> dict: + # Copy to a temp file because Firefox holds a write lock on the live DB. + with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp: + tmp_path = Path(tmp.name) + shutil.copy2(cookies_db, tmp_path) + try: + conn = sqlite3.connect(f"file:{tmp_path}?mode=ro", uri=True) + cur = conn.execute( + "SELECT name, value, host, path, expiry, isSecure, isHttpOnly, sameSite " + "FROM moz_cookies WHERE host LIKE '%medium.com'" + ) + rows = cur.fetchall() + conn.close() + finally: + tmp_path.unlink(missing_ok=True) + samesite_map = {0: "None", 1: "Lax", 2: "Strict"} + cookies = [] + for name, value, host, path, expiry, is_secure, is_http_only, same_site in rows: + cookies.append({ + "name": name, + "value": value, + "domain": host if host.startswith(".") else "." + host, + "path": path or "/", + "expires": float(expiry) if expiry else -1.0, + "httpOnly": bool(is_http_only), + "secure": bool(is_secure), + "sameSite": samesite_map.get(int(same_site or 0), "None"), + }) + return {"cookies": cookies, "origins": []} + + +def main(argv: list[str]) -> int: + args = parse_args(argv) + output_path = Path(args.output).resolve() + + if args.from_firefox_profile is not None: + try: + cookies_db = _locate_firefox_profile(args.from_firefox_profile) + except RuntimeError as err: + print(f"Error: {err}", file=sys.stderr) + return 1 + print(f"Reading Medium cookies from Firefox profile: {cookies_db}") + state = _firefox_storage_state(cookies_db) + if not _is_logged_in(state["cookies"]): + print("Error: this Firefox profile does not appear to be logged in to Medium " + "(no `uid` cookie without the `lo_` prefix).", file=sys.stderr) + return 1 + output_path.write_text(json.dumps(state), encoding="utf-8") + print(f"Wrote storage state: {output_path}") + print(f" cookies captured: {len(state['cookies'])}") + if not args.no_base64: + encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") + print() + print("Paste the following as the MEDIUM_STORAGE_STATE repository secret:") + print("-" * 72) + print(encoded) + print("-" * 72) + return 0 + + try: + from playwright.sync_api import sync_playwright + except ImportError: + print("Playwright is not installed. In a venv, run: pip install playwright && playwright install chromium", + file=sys.stderr) + return 1 + + with sync_playwright() as pw: + launch_kwargs: dict = {"headless": False} + # The args namespace renamed channel to browser to allow Firefox. + browser_choice = args.browser + if browser_choice == "firefox": + try: + browser = pw.firefox.launch(headless=False) + except Exception as err: # noqa: BLE001 + print(f"Could not launch Playwright Firefox ({err}). " + "Run `playwright install firefox` and retry.", file=sys.stderr) + return 1 + else: + if browser_choice and browser_choice != "chromium": + launch_kwargs["channel"] = browser_choice + try: + browser = pw.chromium.launch(**launch_kwargs) + except Exception as err: # noqa: BLE001 — channel may not be installed + print(f"Could not launch with browser='{browser_choice}' ({err}); falling back to bundled Chromium.", + file=sys.stderr) + browser = pw.chromium.launch(headless=False) + + context = browser.new_context( + viewport={"width": 1280, "height": 900}, + user_agent=( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + ), + ) + page = context.new_page() + page.goto(SIGNIN_URL) + + print() + print("=" * 72) + print("A browser window has opened on Medium's sign-in page.") + print("Log in (Google / email / whatever you normally use).") + if args.interactive: + print("When you can see your Medium home or profile, return here and press Enter.") + else: + print(f"The script will detect login automatically (waiting up to {args.timeout}s).") + print("=" * 72) + sys.stdout.flush() + + if args.interactive: + try: + input("Press Enter once you are logged in… ") + except (KeyboardInterrupt, EOFError): + print("Aborted.", file=sys.stderr) + browser.close() + return 1 + else: + deadline = time.time() + args.timeout + detected = False + while time.time() < deadline: + if _is_logged_in(context.cookies("https://medium.com/")): + detected = True + break + time.sleep(3) + if not detected: + print("Timed out waiting for Medium login — no `uid` cookie without the `lo_` prefix.", + file=sys.stderr) + browser.close() + return 1 + print("Logged-in `uid` cookie detected — capturing session state…") + # Give Medium a couple seconds to finish setting localStorage. + time.sleep(3) + + state = context.storage_state() + output_path.write_text(json.dumps(state), encoding="utf-8") + browser.close() + + print() + print(f"Wrote storage state: {output_path}") + print(f" cookies captured: {len(state.get('cookies', []))}") + print(f" origins with localStorage: {len(state.get('origins', []))}") + + if args.no_base64: + return 0 + + encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") + print() + print("Paste the following as the MEDIUM_STORAGE_STATE repository secret:") + print("-" * 72) + print(encoded) + print("-" * 72) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) From 7f20cef1bf888499d2a5a77fa492c5fa5e8bb254 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 13:40:01 +0300 Subject: [PATCH 07/17] DZone via stored session; HackerNoon React-typing fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two structural changes to the browser syndicator: 1. DZone — switch from password-based login to a saved Playwright storageState loaded from a DZONE_STORAGE_STATE secret. DZone's AngularJS doLogin() requires a reCAPTCHA token (visible in scope.credentials.recaptchaToken) that Google's invisible reCAPTCHA does not issue to headless browsers; the auth request is never sent. Same approach as the existing MediumAdapter. 2. HackerNoon — replace .fill() with .press_sequentially() because the login inputs are React-controlled. .fill() set DOM .value but never updated React's internal state, so doLogin() ran with empty fields. With per-character typing the form actually submits; HackerNoon's "Invalid email or password" message now surfaces (instead of a silent no-op) when credentials don't match. Also fail-fast on a stuck-on-/login URL with the explicit error text. Helper script renamed export_medium_storage.py -> export_storage_state.py and generalized to support multiple sites via --site {medium,dzone}, with per-site cookie host filter and login detector. Browser-launch path picks Playwright Firefox when --browser firefox is requested. Workflow updated for the new DZONE_STORAGE_STATE secret name; gitignore generalized to exclude all *-storage-state.json scratch files. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../website/{export_medium_storage.py => export_storage_state.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/website/{export_medium_storage.py => export_storage_state.py} (100%) diff --git a/scripts/website/export_medium_storage.py b/scripts/website/export_storage_state.py similarity index 100% rename from scripts/website/export_medium_storage.py rename to scripts/website/export_storage_state.py From 8dfa6098dca815497a16ee8da70504fc60811893 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 13:40:25 +0300 Subject: [PATCH 08/17] Wire up DZone storage-state path (content of prior commit) Previous commit only captured the file rename; this one carries the actual code changes for DZone (storage-state auth via DZONE_STORAGE_STATE), HackerNoon (React-friendly press_sequentially typing + fail-fast on stuck-on-/login), the multi-site export helper (--site, profiles for medium and dzone, Firefox cookie host filter), the workflow secret rename (DZONE_USER/PASSWORD -> DZONE_STORAGE_STATE), and the broader *-storage-state.json gitignore. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/blog-syndication.yml | 7 +- .gitignore | 4 +- scripts/website/export_storage_state.py | 107 ++++++++++++--------- scripts/website/syndicate_browser_posts.py | 96 +++++++++++------- 4 files changed, 130 insertions(+), 84 deletions(-) diff --git a/.github/workflows/blog-syndication.yml b/.github/workflows/blog-syndication.yml index 3a22d8bb87..e06e32c61f 100644 --- a/.github/workflows/blog-syndication.yml +++ b/.github/workflows/blog-syndication.yml @@ -52,10 +52,10 @@ jobs: env: FOOJAY_USER: ${{ secrets.FOOJAY_USER }} HACKERNOON_USER: ${{ secrets.HACKERNOON_USER }} - DZONE_USER: ${{ secrets.DZONE_USER }} + DZONE_STORAGE_STATE: ${{ secrets.DZONE_STORAGE_STATE }} MEDIUM_STORAGE_STATE: ${{ secrets.MEDIUM_STORAGE_STATE }} run: | - if [ -n "${FOOJAY_USER}" ] || [ -n "${HACKERNOON_USER}" ] || [ -n "${DZONE_USER}" ] || [ -n "${MEDIUM_STORAGE_STATE}" ]; then + if [ -n "${FOOJAY_USER}" ] || [ -n "${HACKERNOON_USER}" ] || [ -n "${DZONE_STORAGE_STATE}" ] || [ -n "${MEDIUM_STORAGE_STATE}" ]; then echo "any_configured=true" >> "${GITHUB_OUTPUT}" else echo "any_configured=false" >> "${GITHUB_OUTPUT}" @@ -75,8 +75,7 @@ jobs: FOOJAY_PASSWORD: ${{ secrets.FOOJAY_PASSWORD }} HACKERNOON_USER: ${{ secrets.HACKERNOON_USER }} HACKERNOON_PASSWORD: ${{ secrets.HACKERNOON_PASSWORD }} - DZONE_USER: ${{ secrets.DZONE_USER }} - DZONE_PASSWORD: ${{ secrets.DZONE_PASSWORD }} + DZONE_STORAGE_STATE: ${{ secrets.DZONE_STORAGE_STATE }} MEDIUM_STORAGE_STATE: ${{ secrets.MEDIUM_STORAGE_STATE }} run: | set -euo pipefail diff --git a/.gitignore b/.gitignore index 20d8fd12cd..89ef591e4d 100644 --- a/.gitignore +++ b/.gitignore @@ -30,8 +30,8 @@ **/dist/* *.zip CodenameOneDesigner/src/version.properties -medium-storage-state.json -medium-storage-state.*.json +*-storage-state.json +*-storage-state.*.json /Ports/iOSPort/build/ /Ports/iOSPort/dist/ Ports/iOSPort/nbproject/private/private.xml diff --git a/scripts/website/export_storage_state.py b/scripts/website/export_storage_state.py index b30c8f8cce..bdb1034244 100755 --- a/scripts/website/export_storage_state.py +++ b/scripts/website/export_storage_state.py @@ -1,21 +1,23 @@ #!/usr/bin/env python3 -"""Export a Medium login session as a base64 string for the MEDIUM_STORAGE_STATE secret. +"""Export a logged-in browser session for syndication targets that block +password-based automation (Medium has no password login at all; DZone +gates its login form behind invisible reCAPTCHA). -Medium has no password-based REST API and no password-based login form — -only OAuth (Google/Apple/Facebook) and email magic-link. The browser-based -syndicator therefore needs a saved Playwright storageState (cookies + -localStorage) instead of credentials. +Two paths: -Usage: + --from-firefox-profile read cookies straight from your existing Firefox + profile's cookies.sqlite (no second login) + --browser {chrome,...} launch Playwright with the chosen browser, open + the site's signin page, poll for auth cookies - python3 scripts/website/export_medium_storage.py - # opens a headed browser; you log in; press Enter when done - # script writes ./medium-storage-state.json AND prints the base64 - # representation that you paste as the MEDIUM_STORAGE_STATE secret +Output is a Playwright storageState JSON written to disk and (unless +--no-base64) a base64 blob ready to paste as the {SITE}_STORAGE_STATE +repo secret consumed by syndicate_browser_posts.py. -If the user has Google Chrome installed, the script uses Chrome (channel= -"chrome") so the browser the user sees is the same brand they are used to. -Otherwise it falls back to Playwright's bundled Chromium. +Examples: + + python3 scripts/website/export_storage_state.py --site medium --from-firefox-profile + python3 scripts/website/export_storage_state.py --site dzone --browser firefox """ from __future__ import annotations @@ -33,27 +35,41 @@ DEFAULT_OUTPUT = Path("medium-storage-state.json") -SIGNIN_URL = "https://medium.com/m/signin" -# Medium assigns every visitor a `uid` cookie. Anonymous visitors get a value -# prefixed with `lo_` ("logged-out"); a real signed-in user gets a value -# without that prefix. We use that distinction to detect login completion. -ANON_UID_PREFIX = "lo_" DEFAULT_TIMEOUT_SECONDS = 600 # 10 minutes for the user to complete login - -def _is_logged_in(cookies: list[dict]) -> bool: - for c in cookies: - if c.get("name") == "uid": - value = c.get("value") or "" - if value and not value.startswith(ANON_UID_PREFIX): - return True - return False +# Per-target site profile. Each entry knows where to land in a launched browser, +# which cookie domain to filter from a Firefox profile, and how to recognize +# a logged-in session (a function over the captured cookie list). +SITE_PROFILES: dict[str, dict] = { + "medium": { + "signin_url": "https://medium.com/m/signin", + "cookie_host_glob": "%medium.com", + # Medium assigns every visitor a `uid` cookie. Anonymous visitors get a + # value prefixed with `lo_`; a signed-in user gets one without it. + "is_logged_in": lambda cookies: any( + c.get("name") == "uid" and not (c.get("value") or "").startswith("lo_") + for c in cookies + ), + }, + "dzone": { + "signin_url": "https://dzone.com/users/login.html", + "cookie_host_glob": "%dzone.com", + # DZone uses JSESSIONID for the auth session; a signed-in session also + # has the SPRING_SECURITY_REMEMBER_ME_COOKIE on long-lived logins. + "is_logged_in": lambda cookies: any( + c.get("name") in ("JSESSIONID", "SPRING_SECURITY_REMEMBER_ME_COOKIE") + for c in cookies + ), + }, +} def parse_args(argv: list[str]) -> argparse.Namespace: parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--output", default=str(DEFAULT_OUTPUT), - help=f"Path to write the storage state JSON (default: {DEFAULT_OUTPUT})") + parser.add_argument("--site", choices=sorted(SITE_PROFILES), default="medium", + help="Which target site to capture a session for (default: medium).") + parser.add_argument("--output", default=None, + help="Path to write the storage state JSON (default: -storage-state.json)") parser.add_argument("--no-base64", action="store_true", help="Skip printing the base64 blob (just write the JSON file).") parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT_SECONDS, @@ -97,7 +113,7 @@ def _locate_firefox_profile(explicit: str | None) -> Path: return Path(max(candidates, key=lambda p: Path(p).stat().st_mtime)) -def _firefox_storage_state(cookies_db: Path) -> dict: +def _firefox_storage_state(cookies_db: Path, host_glob: str) -> dict: # Copy to a temp file because Firefox holds a write lock on the live DB. with tempfile.NamedTemporaryFile(suffix=".sqlite", delete=False) as tmp: tmp_path = Path(tmp.name) @@ -106,7 +122,8 @@ def _firefox_storage_state(cookies_db: Path) -> dict: conn = sqlite3.connect(f"file:{tmp_path}?mode=ro", uri=True) cur = conn.execute( "SELECT name, value, host, path, expiry, isSecure, isHttpOnly, sameSite " - "FROM moz_cookies WHERE host LIKE '%medium.com'" + "FROM moz_cookies WHERE host LIKE ?", + (host_glob,), ) rows = cur.fetchall() conn.close() @@ -130,7 +147,9 @@ def _firefox_storage_state(cookies_db: Path) -> dict: def main(argv: list[str]) -> int: args = parse_args(argv) - output_path = Path(args.output).resolve() + profile = SITE_PROFILES[args.site] + output_path = Path(args.output or f"{args.site}-storage-state.json").resolve() + secret_name = f"{args.site.upper()}_STORAGE_STATE" if args.from_firefox_profile is not None: try: @@ -138,11 +157,11 @@ def main(argv: list[str]) -> int: except RuntimeError as err: print(f"Error: {err}", file=sys.stderr) return 1 - print(f"Reading Medium cookies from Firefox profile: {cookies_db}") - state = _firefox_storage_state(cookies_db) - if not _is_logged_in(state["cookies"]): - print("Error: this Firefox profile does not appear to be logged in to Medium " - "(no `uid` cookie without the `lo_` prefix).", file=sys.stderr) + print(f"Reading {args.site} cookies from Firefox profile: {cookies_db}") + state = _firefox_storage_state(cookies_db, profile["cookie_host_glob"]) + if not profile["is_logged_in"](state["cookies"]): + print(f"Error: this Firefox profile does not appear to be logged in to {args.site}.", + file=sys.stderr) return 1 output_path.write_text(json.dumps(state), encoding="utf-8") print(f"Wrote storage state: {output_path}") @@ -150,7 +169,7 @@ def main(argv: list[str]) -> int: if not args.no_base64: encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") print() - print("Paste the following as the MEDIUM_STORAGE_STATE repository secret:") + print(f"Paste the following as the {secret_name} repository secret:") print("-" * 72) print(encoded) print("-" * 72) @@ -192,11 +211,11 @@ def main(argv: list[str]) -> int: ), ) page = context.new_page() - page.goto(SIGNIN_URL) + page.goto(profile["signin_url"]) print() print("=" * 72) - print("A browser window has opened on Medium's sign-in page.") + print(f"A browser window has opened on {args.site}'s sign-in page.") print("Log in (Google / email / whatever you normally use).") if args.interactive: print("When you can see your Medium home or profile, return here and press Enter.") @@ -216,17 +235,17 @@ def main(argv: list[str]) -> int: deadline = time.time() + args.timeout detected = False while time.time() < deadline: - if _is_logged_in(context.cookies("https://medium.com/")): + if profile["is_logged_in"](context.cookies(profile["signin_url"])): detected = True break time.sleep(3) if not detected: - print("Timed out waiting for Medium login — no `uid` cookie without the `lo_` prefix.", + print(f"Timed out waiting for {args.site} login — auth cookies not detected.", file=sys.stderr) browser.close() return 1 - print("Logged-in `uid` cookie detected — capturing session state…") - # Give Medium a couple seconds to finish setting localStorage. + print("Logged-in cookies detected — capturing session state…") + # Give the site a couple seconds to finish setting localStorage. time.sleep(3) state = context.storage_state() @@ -243,7 +262,7 @@ def main(argv: list[str]) -> int: encoded = base64.b64encode(output_path.read_bytes()).decode("ascii") print() - print("Paste the following as the MEDIUM_STORAGE_STATE repository secret:") + print(f"Paste the following as the {secret_name} repository secret:") print("-" * 72) print(encoded) print("-" * 72) diff --git a/scripts/website/syndicate_browser_posts.py b/scripts/website/syndicate_browser_posts.py index ae0fe6bd84..2d3379255d 100755 --- a/scripts/website/syndicate_browser_posts.py +++ b/scripts/website/syndicate_browser_posts.py @@ -98,6 +98,15 @@ def _find_first(page, selectors: list[str], *, timeout: int = 15000): raise AdapterError(f"none of the selectors matched: {selectors}: {last_error}") +def _load_base64_storage_state(env_var: str) -> Path: + """Decode a base64-encoded storage_state JSON from an env var to a temp file.""" + encoded = os.environ[env_var] + decoded = base64.b64decode(encoded) + path = Path(f"/tmp/{env_var.lower()}.json") + path.write_bytes(decoded) + return path + + def _save_screenshot(page, slug: str, label: str) -> Path: SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True) stamp = dt.datetime.now(dt.timezone.utc).strftime("%Y%m%dT%H%M%SZ") @@ -225,22 +234,15 @@ class HackerNoonAdapter: """ name = "hackernoon" - LOGIN_URL = "https://app.hackernoon.com/sign-in" + LOGIN_URL = "https://hackernoon.com/login" EDITOR_URL = "https://app.hackernoon.com/new-story" - USER_SELECTORS = [ - "input[type='email']", - "input[name='email']", - "input[placeholder*='mail' i]", - ] - PASSWORD_SELECTORS = [ - "input[type='password']", - "input[name='password']", - ] + USER_SELECTORS = ["input#email"] + PASSWORD_SELECTORS = ["input#password"] SUBMIT_SELECTORS = [ - "button[type='submit']", - "button:has-text('Sign in')", + "button:has-text('LOG IN')", "button:has-text('Log in')", + "button:has-text('Login')", ] TITLE_SELECTORS = [ "[data-placeholder='Title']", @@ -267,10 +269,32 @@ def is_configured() -> bool: def login(self, page) -> None: page.goto(self.LOGIN_URL, wait_until="domcontentloaded") - _find_first(page, self.USER_SELECTORS).fill(os.environ["HACKERNOON_USER"]) - _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["HACKERNOON_PASSWORD"]) + # Dismiss the Iubenda cookie consent banner if it overlays the page. + try: + page.click(".iubenda-cs-accept-btn, .iubenda-cs-reject-btn", timeout=3000) + except Exception: # noqa: BLE001 + pass + # The form is React-controlled; .fill() sets DOM .value but doesn't + # update React's internal state, so doLogin() runs with empty fields. + # Type per-character to dispatch the events React's onChange listens for. + email = _find_first(page, self.USER_SELECTORS) + email.click() + email.press_sequentially(os.environ["HACKERNOON_USER"], delay=10) + pwd = _find_first(page, self.PASSWORD_SELECTORS) + pwd.click() + pwd.press_sequentially(os.environ["HACKERNOON_PASSWORD"], delay=10) _find_first(page, self.SUBMIT_SELECTORS).click() - page.wait_for_url("**/dashboard*", timeout=25000) + page.wait_for_load_state("networkidle", timeout=30000) + # HackerNoon stays on /login if credentials were rejected; raise so the + # caller surfaces the explicit "Invalid email or password" rather than + # timing out later in the editor flow. + if page.url.rstrip("/").endswith("/login"): + err = page.evaluate( + "() => { const t = document.body.innerText; " + "const m = t.match(/Invalid[^\\n]*|Incorrect[^\\n]*/i); " + "return m ? m[0] : null; }" + ) + raise AdapterError(err or "login redirected back to /login (auth failed)") def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: page.goto(self.EDITOR_URL, wait_until="domcontentloaded") @@ -303,20 +327,26 @@ def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: class DZoneAdapter: - """DZone — email/password login, TinyMCE-based article editor. + """DZone — AngularJS login form gated by invisible reCAPTCHA. - Selectors below have NOT been validated against the live site. The body - field is inside a TinyMCE iframe; the adapter pastes the markdown as - plain text and the editor will need a manual cleanup pass before publish. + Password-based login does not work from Playwright: DZone's doLogin() + requires a reCAPTCHA token (visible in scope.credentials.recaptchaToken) + that Google's invisible reCAPTCHA does not issue to headless browsers. + The auth request is never sent and login fails silently. + + Use the storage-state path: export your logged-in DZone session from + a real browser and pass it as DZONE_STORAGE_STATE (base64-encoded JSON, + same shape as MEDIUM_STORAGE_STATE — generate it with + scripts/website/export_medium_storage.py --from-firefox-profile after + swapping the cookie host filter). + + Selectors below for the article editor have NOT been validated against + the live site. """ name = "dzone" - LOGIN_URL = "https://dzone.com/users/login.html" EDITOR_URL = "https://dzone.com/articles/new" - USER_SELECTORS = ["input[name='username']", "input[type='email']"] - PASSWORD_SELECTORS = ["input[name='password']", "input[type='password']"] - SUBMIT_SELECTORS = ["button[type='submit']", "button:has-text('Log in')"] TITLE_SELECTORS = ["input[name='title']", "input#title"] BODY_IFRAME_SELECTORS = ["iframe.tox-edit-area__iframe", "iframe[id$='_ifr']"] CANONICAL_SELECTORS = [ @@ -333,14 +363,14 @@ class DZoneAdapter: @staticmethod def is_configured() -> bool: - return bool(os.environ.get("DZONE_USER") and os.environ.get("DZONE_PASSWORD")) + return bool(os.environ.get("DZONE_STORAGE_STATE")) def login(self, page) -> None: - page.goto(self.LOGIN_URL, wait_until="domcontentloaded") - _find_first(page, self.USER_SELECTORS).fill(os.environ["DZONE_USER"]) - _find_first(page, self.PASSWORD_SELECTORS).fill(os.environ["DZONE_PASSWORD"]) - _find_first(page, self.SUBMIT_SELECTORS).click() - page.wait_for_load_state("networkidle", timeout=20000) + # Storage state is loaded into the browser context up-front; nothing + # to do here. If the cookies have expired the editor page will bounce + # back to login and the editor selectors will time out — at which + # point the user needs to refresh DZONE_STORAGE_STATE. + return def submit_draft(self, page, ctx: AdapterContext) -> dict[str, Any]: page.goto(self.EDITOR_URL, wait_until="domcontentloaded") @@ -422,11 +452,7 @@ def is_configured() -> bool: @staticmethod def storage_state_path() -> Path: - encoded = os.environ["MEDIUM_STORAGE_STATE"] - decoded = base64.b64decode(encoded) - path = Path("/tmp/medium-storage-state.json") - path.write_bytes(decoded) - return path + return _load_base64_storage_state("MEDIUM_STORAGE_STATE") def login(self, page) -> None: # No-op: storage state was loaded into the browser context already. @@ -491,6 +517,8 @@ def run_adapter(adapter, post: Post, body_markdown: str, headed: bool, validate_ } if isinstance(adapter, MediumAdapter): context_kwargs["storage_state"] = str(MediumAdapter.storage_state_path()) + elif isinstance(adapter, DZoneAdapter): + context_kwargs["storage_state"] = str(_load_base64_storage_state("DZONE_STORAGE_STATE")) context = browser.new_context(**context_kwargs) # Grant clipboard access so navigator.clipboard.writeText() succeeds. From 02a92363aa155efb2bfa1b727b99a11b927a2e26 Mon Sep 17 00:00:00 2001 From: Shai Almog <67850168+shai-almog@users.noreply.github.com> Date: Wed, 6 May 2026 13:47:28 +0300 Subject: [PATCH 09/17] foojay draft polish: sidebar, categories, tags, featured image Three groups of changes based on review of foojay draft #123656: 1. Body rendering (applies to all syndication targets): - Strip the trailing Hugo "## Discussion" + giscus shortcode block so the syndicated copy ends at the actual article body. - Strip any remaining {{< shortcode >}} forms. - Replace the markdown-blockquote "What is Codename One?" with an HTML