diff --git a/CLAUDE.md b/CLAUDE.md index 55f89a5..db93014 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,6 +58,20 @@ uv run pytest tests/ -v No exceptions. +## Versioning & Releases + +Use **semantic-release** to manage versions. Prefer **patch bumps (+0.0.1)** as much as possible — reserve minor (+0.1.0) for genuinely new user-facing features and major (+1.0.0) for breaking changes only. + +Commit message conventions that drive the bump: + +- `fix:` → patch (+0.0.1) — **default choice** for most changes +- `docs:` → patch (+0.0.1) — documentation updates +- `chore:` → patch (+0.0.1) — maintenance, tooling, deps +- `feat:` → minor (+0.1.0) — only for meaningful new features +- `BREAKING CHANGE:` footer or `!` suffix → major (+1.0.0) — only for breaking API changes + +When in doubt, frame the change as `fix:`, `docs:`, or `chore:` to keep the bump at patch level. + ## Architecture **Core Components:** diff --git a/cookbook/company-info/scrapegraph_sdk.ipynb b/cookbook/company-info/scrapegraph_sdk.ipynb index cf207c9..7094746 100644 --- a/cookbook/company-info/scrapegraph_sdk.ipynb +++ b/cookbook/company-info/scrapegraph_sdk.ipynb @@ -6,9 +6,7 @@ "id": "jEkuKbcRrPcK" }, "source": [ - "## \ud83d\udd77\ufe0f Extract Company Info with Official Scrapegraph SDK\n", - "\n", - "[](https://www.runalph.ai/notebooks/scrapegraphai/scrapegraph-sdk) [](https://colab.research.google.com/drive/12d7LycLAYO2bFsBo_jtPHXSaIg7AqR3O?usp=sharing)" + "## 🕷️ Extract Company Info with Official Scrapegraph SDK\n" ] }, { @@ -26,7 +24,7 @@ "id": "IzsyDXEWwPVt" }, "source": [ - "### \ud83d\udd27 Install `dependencies`" + "### 🔧 Install `dependencies`" ] }, { @@ -47,7 +45,7 @@ "id": "apBsL-L2KzM7" }, "source": [ - "### \ud83d\udd11 Import `ScrapeGraph` API key" + "### 🔑 Import `ScrapeGraph` API key" ] }, { @@ -85,7 +83,7 @@ "output_type": "stream", "text": [ "SGAI_API_KEY not found in environment.\n", - "Please enter your SGAI_API_KEY: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n", + "Please enter your SGAI_API_KEY: ··········\n", "SGAI_API_KEY has been set in the environment.\n" ] } @@ -104,7 +102,7 @@ "id": "jnqMB2-xVYQ7" }, "source": [ - "### \ud83d\udcdd Defining an `Output Schema` for Webpage Content Extraction\n" + "### 📝 Defining an `Output Schema` for Webpage Content Extraction\n" ] }, { @@ -198,7 +196,7 @@ "outputs": [], "source": [ "from pydantic import BaseModel, Field\n", - "from typing import List, Dict, Optional\n", + "from typing import List\n", "\n", "# Schema for founder information\n", "class FounderSchema(BaseModel):\n", @@ -239,7 +237,7 @@ "id": "cDGH0b2DkY63" }, "source": [ - "### \ud83d\ude80 Initialize `SGAI Client` and start extraction" + "### 🚀 Initialize `SGAI Client` and start extraction" ] }, { @@ -420,7 +418,7 @@ "id": "2as65QLypwdb" }, "source": [ - "### \ud83d\udcbe Save the output to a `CSV` file" + "### 💾 Save the output to a `CSV` file" ] }, { @@ -1885,7 +1883,7 @@ "id": "-1SZT8VzTZNd" }, "source": [ - "## \ud83d\udd17 Resources" + "## 🔗 Resources" ] }, { @@ -1901,13 +1899,13 @@ "
\n", "\n", "\n", - "- \ud83d\ude80 **Get your API Key:** [ScrapeGraphAI Dashboard](https://dashboard.scrapegraphai.com) \n", - "- \ud83d\udc19 **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", - "- \ud83d\udcbc **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", - "- \ud83d\udc26 **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", - "- \ud83d\udcac **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", + "- 🚀 **Get your API Key:** [ScrapeGraphAI Dashboard](https://dashboard.scrapegraphai.com) \n", + "- 🐙 **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", + "- 💼 **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", + "- 🐦 **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", + "- 💬 **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", "\n", - "Made with \u2764\ufe0f by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" + "Made with ❤️ by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" ] } ], diff --git a/cookbook/github-trending/scrapegraph_sdk.ipynb b/cookbook/github-trending/scrapegraph_sdk.ipynb index ee762e4..1d38d0c 100644 --- a/cookbook/github-trending/scrapegraph_sdk.ipynb +++ b/cookbook/github-trending/scrapegraph_sdk.ipynb @@ -6,9 +6,7 @@ "id": "jEkuKbcRrPcK" }, "source": [ - "## \ud83d\udd77\ufe0f Extract Github Trending Repositories with Official Scrapegraph SDK\n", - "\n", - "[](https://www.runalph.ai/notebooks/scrapegraphai/scrapegraph-sdk-1) [](https://colab.research.google.com/drive/1SSsI0naieeHjHcJkW22CqNIZwLHWjXmd?usp=sharing)" + "## 🕷️ Extract Github Trending Repositories with Official Scrapegraph SDK\n" ] }, { @@ -26,7 +24,7 @@ "id": "IzsyDXEWwPVt" }, "source": [ - "### \ud83d\udd27 Install `dependencies`" + "### 🔧 Install `dependencies`" ] }, { @@ -47,7 +45,7 @@ "id": "apBsL-L2KzM7" }, "source": [ - "### \ud83d\udd11 Import `ScrapeGraph` API key" + "### 🔑 Import `ScrapeGraph` API key" ] }, { @@ -81,11 +79,11 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ "Scrapegraph API key:\n", - " \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n" + " ········\n" ] } ], @@ -103,7 +101,7 @@ "id": "jnqMB2-xVYQ7" }, "source": [ - "### \ud83d\udcdd Defining an `Output Schema` for Webpage Content Extraction\n" + "### 📝 Defining an `Output Schema` for Webpage Content Extraction\n" ] }, { @@ -190,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "dlrOEgZk_8V4" }, @@ -221,7 +219,7 @@ "id": "cDGH0b2DkY63" }, "source": [ - "### \ud83d\ude80 Initialize `SGAI Client` and start extraction" + "### 🚀 Initialize `SGAI Client` and start extraction" ] }, { @@ -2532,7 +2530,7 @@ "id": "2as65QLypwdb" }, "source": [ - "### \ud83d\udcbe Save the output to a `CSV` file" + "### 💾 Save the output to a `CSV` file" ] }, { @@ -2689,7 +2687,7 @@ "275 rows \u00d7 6 columns
\n", + "275 rows × 6 columns
\n", "" ], "text/plain": [ @@ -2725,7 +2723,7 @@ "271 Self-evolving agent: grows skill tree from 3.3... 10714 1205 \n", "272 Unlimited FREE AI coding. Connect Claude Code,... 7644 1262 \n", "273 The agent harness performance optimization sys... 178795 27573 \n", - "274 \ud83d\udcda \u300a\u4ece\u96f6\u5f00\u59cb\u6784\u5efa\u667a\u80fd\u4f53\u300b----\u4ece\u96f6\u5f00\u59cb\u7684\u667a\u80fd\u4f53\u539f\u7406\u4e0e\u5b9e\u8df5\u6559\u7a0b 47116 5669 \n", + "274 📚 《从零开始构建智能体》----从零开始的智能体原理与实践教程 47116 5669 \n", "\n", " today_stars language \n", "0 0 No content available \n", @@ -2807,7 +2805,7 @@ "id": "-1SZT8VzTZNd" }, "source": [ - "## \ud83d\udd17 Resources" + "## 🔗 Resources" ] }, { @@ -2823,13 +2821,13 @@ "\n", "\n", "\n", - "- \ud83d\ude80 **Get your API Key:** [ScrapeGraphAI Dashboard](https://scrapegraphai.com/dashboard) \n", - "- \ud83d\udc19 **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", - "- \ud83d\udcbc **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", - "- \ud83d\udc26 **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", - "- \ud83d\udcac **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", + "- 🚀 **Get your API Key:** [ScrapeGraphAI Dashboard](https://scrapegraphai.com/dashboard) \n", + "- 🐙 **GitHub:** [ScrapeGraphAI GitHub](https://github.com/scrapegraphai) \n", + "- 💼 **LinkedIn:** [ScrapeGraphAI LinkedIn](https://www.linkedin.com/company/scrapegraphai/) \n", + "- 🐦 **Twitter:** [ScrapeGraphAI Twitter](https://twitter.com/scrapegraphai) \n", + "- 💬 **Discord:** [Join our Discord Community](https://discord.gg/uJN7TYcpNa) \n", "\n", - "Made with \u2764\ufe0f by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" + "Made with ❤️ by the [ScrapeGraphAI](https://scrapegraphai.com) Team \n" ] } ], diff --git a/cookbook/wired-news/scrapegraph_sdk.ipynb b/cookbook/wired-news/scrapegraph_sdk.ipynb new file mode 100644 index 0000000..73fbe0b --- /dev/null +++ b/cookbook/wired-news/scrapegraph_sdk.ipynb @@ -0,0 +1,880 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "jEkuKbcRrPcK" + }, + "source": [ + "## 🕷️ Extract Wired Science News with Official Scrapegraph SDK" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Rgz5T4eHBVz" + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IzsyDXEWwPVt" + }, + "source": [ + "### 🔧 Install `dependencies`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "os_vm0MkIxr9" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install scrapegraph-py" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "apBsL-L2KzM7" + }, + "source": [ + "### 🔑 Import `ScrapeGraph` API key" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ol9gQbAFkh9b" + }, + "source": [ + "You can find the Scrapegraph API key [here](https://scrapegraphai.com/dashboard)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 17535, + "status": "ok", + "timestamp": 1734531168564, + "user": { + "displayName": "ScrapeGraphAI", + "userId": "10474323355016263615" + }, + "user_tz": -60 + }, + "id": "sffqFG2EJ8bI", + "outputId": "bcdd9ae1-151e-41d6-df36-8835c660460f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SGAI_API_KEY not found in environment.\n", + "Please enter your SGAI_API_KEY: ··········\n", + "SGAI_API_KEY has been set in the environment.\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "if not os.environ.get(\"SGAI_API_KEY\"):\n", + " os.environ[\"SGAI_API_KEY\"] = getpass.getpass(\"Scrapegraph API key:\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jnqMB2-xVYQ7" + }, + "source": [ + "### 📝 Defining an `Output Schema` for Webpage Content Extraction\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VZvxbjfXvbgd" + }, + "source": [ + "If you already know what you want to extract from a webpage, you can **define an output schema** using **Pydantic**. This schema acts as a \"blueprint\" that tells the AI how to structure the response.\n", + "\n", + "| \n", + " | category | \n", + "title | \n", + "link | \n", + "author | \n", + "
|---|---|---|---|---|
| 0 | \n", + "Science | \n", + "The Study That Called Out Black Plastic Utensi... | \n", + "https://www.wired.com/story/black-plastic-uten... | \n", + "Beth Mole, Ars Technica | \n", + "
| 1 | \n", + "Environment | \n", + "Generative AI and Climate Change Are on a Coll... | \n", + "https://www.wired.com/story/true-cost-generati... | \n", + "Sasha Luccioni | \n", + "
| 2 | \n", + "Xenotransplantation | \n", + "A Third Person Has Received a Transplant of a ... | \n", + "https://www.wired.com/story/a-third-person-has... | \n", + "Emily Mullin | \n", + "
| 3 | \n", + "Health | \n", + "Antibodies Could Soon Help Slow the Aging Process | \n", + "https://www.wired.com/story/antibodies-could-s... | \n", + "Andrew Steele | \n", + "
| 4 | \n", + "Science | \n", + "Good at Reading? Your Brain May Be Structured ... | \n", + "https://www.wired.com/story/good-at-reading-yo... | \n", + "Mikael Roll | \n", + "
| 5 | \n", + "Health | \n", + "Mega-Farms Are Driving the Threat of Bird Flu | \n", + "https://www.wired.com/story/mega-farms-are-dri... | \n", + "Georgina Gustin | \n", + "
| 6 | \n", + "Health | \n", + "RFK Plans to Take on Big Pharma. It’s Easier S... | \n", + "https://www.wired.com/story/rfks-plan-to-take-... | \n", + "Emily Mullin | \n", + "
| 7 | \n", + "Environment | \n", + "How Christmas Trees Could Become a Source of L... | \n", + "https://www.wired.com/story/how-christmas-tree... | \n", + "Alexa Phillips | \n", + "
| 8 | \n", + "Environment | \n", + "Creating a Global Package to Solve the Problem... | \n", + "https://www.wired.com/story/global-plastics-tr... | \n", + "Susan Solomon | \n", + "
| 9 | \n", + "Environment | \n", + "These 3 Things Are Standing in the Way of a Gl... | \n", + "https://www.wired.com/story/these-3-things-are... | \n", + "Steve Fletcher and Samuel Winton | \n", + "
\n",
+ "
\n",
+ "