diff --git a/.github/workflows/resize-images.yml b/.github/workflows/resize-images.yml index 514a53fc67..4ee607eb6a 100644 --- a/.github/workflows/resize-images.yml +++ b/.github/workflows/resize-images.yml @@ -8,6 +8,7 @@ on: jobs: resize-images: + if: github.repository == 'ArmDeveloperEcosystem/arm-learning-paths' runs-on: ubuntu-latest steps: - name: Checkout repository @@ -19,7 +20,7 @@ jobs: id: changed-files run: | # Find all image files that changed since last Monday - CHANGED_IMAGES=$(git log --since="7 days ago" --name-only --pretty="" \ + CHANGED_IMAGES=$(git log --since="10 days ago" --name-only --pretty="" \ -- '*.jpg' '*.jpeg' '*.png' '*.JPG' '*.JPEG' '*.PNG' | sort -u | tr '\n' ' ') echo "changed_images=$CHANGED_IMAGES" >> $GITHUB_OUTPUT diff --git a/.wordlist.txt b/.wordlist.txt index cd0fdcc8db..66e0803c18 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -5288,4 +5288,99 @@ menuconfig oss saas todo -yq \ No newline at end of file +yq +AIoT +AQ +BSPs +BatchMatMul +Bitnami +CIX +Deconvolution +EdgeBloXagent +FFI +FLOPs +FtVjdW +FullyConnected +Guo +HgJyHlhCVQrqvpGdCcaf +InfluxDB +IntelliJ +IqNP +JetBrains +Jiaming +Jinja +LiteRT's +MPAC +MergeTree +MicroPac +MicroPacFile +MicroPacs +MicroStack +Micropac +NeuroStack +NodeSource +Podman +PyCharm +QD +RNiiiIPCSUA +RabbitMQ +Radxa +Regardles +Slurm +StrongPassword +TCO +TinkerBlox +Tinkerblox +TransposeConv +UltraEdge +Ultraedge +Uncloud +WebStorm +WhatsApp +XNNPACK's +applicationType +bded +binfmt +binfmts +buildSteps +centre +createdBy +dFamiliarity +df +django +ernie +ffn +fungibility +gemini +getstarted +ize +jinja +mcpserver +microboost +micropac +mlBZgDFc +moe +mpac +neuroboost +pathbreaking +pika +qyM +rabbitmq +rabbitmqadmin +reachability +reflash +relevent +slurm +sr +subtasks +sv +sysreport +tA +templated +tinkerblox +tinkerbloxdev +ultraedge +unclound +wNdi +whatsapp +workdir \ No newline at end of file diff --git a/assets/contributors.csv b/assets/contributors.csv index bc7e464355..69b9b68be3 100644 --- a/assets/contributors.csv +++ b/assets/contributors.csv @@ -111,3 +111,4 @@ Yahya Abouelseoud,Arm,,,, Éliás Bálint,Arm,,,, Steve Suzuki,Arm,,,, Qixiang Xu,Arm,,,, +Phalani Paladugu,Arm,phalani-paladugu,phalani-paladugu,, diff --git a/content/install-guides/_images/mcp-server-install.png b/content/install-guides/_images/mcp-server-install.png new file mode 100644 index 0000000000..e24629bb75 Binary files /dev/null and b/content/install-guides/_images/mcp-server-install.png differ diff --git a/content/install-guides/_images/new-mcp-server-tools.png b/content/install-guides/_images/new-mcp-server-tools.png new file mode 100644 index 0000000000..f186c7743d Binary files /dev/null and b/content/install-guides/_images/new-mcp-server-tools.png differ diff --git a/content/install-guides/gemini.md b/content/install-guides/gemini.md new file mode 100644 index 0000000000..5899c3bacd --- /dev/null +++ b/content/install-guides/gemini.md @@ -0,0 +1,395 @@ +--- +title: Gemini CLI + +draft: true + +author: Jason Andrews +minutes_to_complete: 15 +official_docs: https://ai.google.dev/gemini-api/docs/cli + +test_maintenance: true +test_images: +- ubuntu:latest + +layout: installtoolsall +multi_install: false +multitool_install_part: false +tool_install: true +weight: 1 +--- + +Gemini CLI is Google's command-line interface for interacting with the Gemini AI assistant. You can use it to ask questions about software development, architecture, and general programming tasks with advanced AI capabilities. + +It supports multiple operating systems, including Arm Linux distributions and macOS, and provides powerful AI assistance for developers working on Arm platforms. + +This guide explains how to install Gemini CLI on macOS and Arm Linux. + +## What should I do before installing Gemini CLI? + +You need a Google account to use Gemini CLI. If you don't have one, visit [Google Account Creation](https://accounts.google.com/signup) to create your account. + +You'll also need to set up authentication for the Gemini API. Gemini CLI supports multiple authentication methods, with Google OAuth login being the recommended approach for most users. + +## How do I set up authentication for Gemini CLI? + +Gemini CLI offers three authentication methods. Choose the one that best fits your needs. + +### Option 1: Google OAuth login + +This is your Google account, and is the easiest method for most users. After installing Gemini CLI, run the tool and select "Login with Google" when prompted. This opens your browser for authentication. + +Benefits of using Google OAuth: +- Free tier includes 60 requests per minute and 1,000 requests per day +- Access to Gemini with 1M token context window +- No manual API key management required + +### Option 2: Gemini API key + +If you prefer using an API key, you can generate one from Google AI Studio. + +To get your API key: +1. Visit [Google AI Studio](https://aistudio.google.com/apikey) +2. Sign in with your Google account +3. Click "Create API Key" +4. Copy the generated key + +Set the API key in your environment: + +```bash +export GEMINI_API_KEY="YOUR_API_KEY" +``` + +Benefits of using an API key: +- Free tier includes 250 requests per day +- Works well for automated scripts and CI/CD environments +- No interactive authentication required + +### Option 3: Vertex AI for enterprise users + +For enterprise users with Google Cloud accounts, you can use Vertex AI authentication. + +Set up Vertex AI authentication: + +```bash +export GOOGLE_API_KEY="YOUR_GOOGLE_CLOUD_API_KEY" +export GOOGLE_GENAI_USE_VERTEXAI=true +``` + +Benefits of using Vertex AI: +- Enterprise-grade features and support +- Integration with Google Cloud billing and management +- Higher rate limits and advanced features + +## How do I install Gemini CLI on macOS? + +The easiest way to install Gemini CLI on macOS is using Homebrew, which handles all dependencies automatically. + +### Install Gemini CLI on macOS using Homebrew + +Yes, you can install [Homebrew](https://brew.sh/) if it isn't already available on your computer. + +Install Gemini CLI using Homebrew: + +```console +brew install gemini-cli +``` + +This installs Gemini CLI and automatically handles the Node.js dependency. The Homebrew version is currently at 0.19.4 (stable) and receives regular updates. + +### Install Gemini CLI on macOS using npm + +Yes, if you prefer to use npm or need the latest version, you can install Gemini CLI globally using npm. + +First, make sure you have Node.js version 20 or higher installed. Install Node.js using Homebrew: + +```console +brew install node +``` + +Verify Node.js is installed correctly: + +```console +node --version +``` + +The output should show version 20 or higher: + +```output +v25.2.1 +``` + +Install Gemini CLI globally using npm: + +```console +npm install -g @google/gemini-cli +``` + +This installs the latest version (for example, 0.20.0) directly from npm. Homebrew can lag behind npm, so versions may differ. + +### How do I confirm Gemini CLI is working on macOS? + +You now have Gemini CLI installed on your macOS system. + +Confirm the CLI is available by checking the version: + +```console +gemini --version +``` + +The output shows the version: + +```output +0.20.0 +``` + +Start an interactive session to test basic functionality: + +```console +gemini +``` + +This opens the Gemini CLI interface where you can authenticate and start asking questions. On first run, you'll be prompted to choose your authentication method. + +## How do I install Gemini CLI on Arm Linux? + +You can install Gemini CLI on Arm Linux distributions using npm. This method works on all major Arm Linux distributions including Ubuntu, Debian, CentOS, and others. + +### What packages do I need before installing Gemini CLI on Arm Linux? + +Before installing Gemini CLI, install prerequisite packages and Node.js. + +Install the required packages on Ubuntu/Debian systems: + +```bash +sudo apt update && sudo apt install -y curl +``` + +If you are not using Ubuntu/Debian use your package manager to install curl. + +### How do I install Node.js on Arm Linux? + +Gemini CLI requires Node.js version 20 or higher. The easiest way to install Node.js on Arm Linux is using the NodeSource repository. + +Download and run the Node.js 20.x setup script: + +```bash +curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - +``` + +Verify Node.js is installed correctly: + +```bash +node --version +``` + +The output should show version 20 or higher: + +```output +v20.19.4 +``` + +Verify npm is available: + +```bash +npm --version +``` + +The output shows the npm version: + +```output +10.2.4 +``` + +### How do I install Gemini CLI using npm on Arm Linux? + +With Node.js installed, install Gemini CLI globally using npm. + +Install Gemini CLI globally: + +```bash +sudo npm install -g @google/gemini-cli +``` + +This downloads and installs the latest version of Gemini CLI. The installation may take a few minutes as it downloads dependencies. + +### How do I confirm Gemini CLI is working on Arm Linux? + +You now have Gemini CLI installed on your Arm Linux system. + +Confirm the CLI is available by checking the version: + +```bash +gemini --version +``` + +The output shows the version: + +```output +0.20.0 +``` + +### How do I view the available command-line options? + +To print the available commands and options, use the `--help` flag: + +```bash +gemini --help +``` + +The output shows the available commands and options: + +```output +Usage: gemini [options] [command] + +Gemini CLI - Launch an interactive CLI, use -p/--prompt for non-interactive mode + +Commands: + gemini [query..] Launch Gemini CLI [default] + gemini mcp Manage MCP servers + gemini extensions Manage Gemini CLI extensions. [aliases: extension] + +Positionals: + query Positional prompt. Defaults to one-shot; use -i/--prompt-interactive for interactive. + +Options: + -d, --debug Run in debug mode? [boolean] [default: false] + -m, --model Model [string] + -p, --prompt Prompt. Appended to input on stdin (if any). + [deprecated: Use the positional prompt instead. This flag will be removed in a future version.] [string] + -i, --prompt-interactive Execute the provided prompt and continue in interactive mode [string] + -s, --sandbox Run in sandbox? [boolean] + -y, --yolo Automatically accept all actions (aka YOLO mode, see https://www.youtube.com/watch?v=xvFZjo5PgG0 + for more details)? [boolean] [default: false] + --approval-mode Set the approval mode: default (prompt for approval), auto_edit (auto-approve edit tools), yolo + (auto-approve all tools) [string] [choices: "default", "auto_edit", "yolo"] + --experimental-acp Starts the agent in ACP mode [boolean] + --allowed-mcp-server-names Allowed MCP server names [array] + --allowed-tools Tools that are allowed to run without confirmation [array] + -e, --extensions A list of extensions to use. If not provided, all extensions are used. [array] + -l, --list-extensions List all available extensions and exit. [boolean] + -r, --resume Resume a previous session. Use "latest" for most recent or index number (e.g. --resume 5) + [string] + --list-sessions List available sessions for the current project and exit. [boolean] + --delete-session Delete a session by index number (use --list-sessions to see available sessions). [string] + --include-directories Additional directories to include in the workspace (comma-separated or multiple + --include-directories) [array] + --screen-reader Enable screen reader mode for accessibility. [boolean] + -o, --output-format The format of the CLI output. [string] [choices: "text", "json", "stream-json"] + -v, --version Show version number [boolean] + -h, --help Show help +``` + +Your Gemini CLI installation on Arm Linux is now complete and ready to use. + +## How do I configure context for Arm development? + +Context configuration allows you to provide Gemini with persistent information about your development environment, preferences, and project details. This helps Gemini give more relevant and tailored responses for Arm architecture development. + +### How do I create a context file for Gemini CLI? + +Gemini CLI looks for context files in your home directory's `.gemini` configuration folder. Create this directory and add your context file. + +Create the Gemini configuration directory: + +```bash +mkdir -p ~/.gemini +``` + +Create a context file with Arm development information: + +```bash +cat > ~/.gemini/GEMINI.md << 'EOF' +I am an Arm Linux developer. I prefer Ubuntu and other Debian based distributions. I don't use any x86 computers so please provide all information assuming I'm working on Arm Linux. Sometimes I use macOS and Windows on Arm, but please only provide information about these operating systems when I ask for it. +EOF +``` + +This creates a context file that tells Gemini about your Arm development focus and preferences. + +### How do I verify that context is being loaded? + +Verify that Gemini is loading your context file by starting a chat session and asking a development question. + +Start Gemini CLI: + +```console +gemini +``` + +Ask a question that should trigger context-aware responses: + +```output +How do I install gcloud? +``` + +If context is loaded correctly, Gemini should provide Arm-specific recommendations. + +## How do I integrate the Arm MCP server with Gemini CLI? + +The Arm MCP (Model Context Protocol) server provides Gemini CLI with specialized tools and knowledge for Arm architecture development, migration, and optimization. By integrating the Arm MCP server, you gain access to Arm-specific documentation, code analysis tools, and optimization recommendations directly through your Gemini conversations. + +### How do I set up the Arm MCP server with Docker? + +The Arm MCP server runs as a Docker container that Gemini CLI connects to via the Model Context Protocol. You need Docker installed on your system to use the MCP server. + +First, ensure Docker is installed and running on your system. Install Docker by following the [Docker installation guide](/install-guides/docker/). + +Pull the Arm MCP server Docker image: + +```console +docker pull armlimited/arm-mcp:latest +``` + +### How do I configure Gemini CLI to use the Arm MCP server? + +Gemini CLI uses a configuration file to connect to MCP servers. Create or update this configuration to include the Arm MCP server. + +Use an editor to modify the file `~/.gemini/settings.json` to add an MCP object. + +You may have other objects already in the file so make sure to use a `,` at the end of each object that is not the last one. For example, the code below shows both the `security` and the `mcpServers` objects. + +```json +{ + "security": { + "auth": { + "selectedType": "oauth-personal" + } + }, + "mcpServers": { + "arm_mcp_server": { + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/Users/yourname01/yourlocalcodebase:/workspace", + "--name", "arm-mcp", + "armlimited/arm-mcp:latest" + ], + "env": {}, + "timeout": 60000 + } + } +} +``` + +This configuration tells Gemini CLI to connect to the Arm MCP server running in the Docker container. + +### How do I verify the Arm MCP server is working? + +Start the Gemini CLI and list the tools from the MCP server to verify it's working: + +```console +gemini +``` + +Use the `/tools` command to list the available tools: + +```console +/tools +``` + +You should see the Arm MCP server tools listed in the output. If the arm-mcp server says it's still loading, wait a moment and run `/tools` again. + +If you're facing issues or have questions, reach out to mcpserver@arm.com . + +You're ready to use the Gemini CLI. \ No newline at end of file diff --git a/content/install-guides/github-copilot.md b/content/install-guides/github-copilot.md new file mode 100644 index 0000000000..2ae0e0a2d4 --- /dev/null +++ b/content/install-guides/github-copilot.md @@ -0,0 +1,338 @@ +--- +title: GitHub Copilot + +draft: true + +author: Pareena Verma +minutes_to_complete: 10 +official_docs: https://docs.github.com/en/copilot + +test_maintenance: true +test_images: +- ubuntu:latest + +layout: installtoolsall +multi_install: false +multitool_install_part: false +tool_install: true +weight: 1 +--- + +GitHub Copilot is an AI coding assistant that helps you write code faster and with less effort. It suggests whole lines or entire functions based on your comments and code context. + +GitHub Copilot works seamlessly on Arm-based systems, including Linux distributions running on Arm servers, macOS on Apple Silicon, and Windows on Arm devices. + +This guide focuses on installing GitHub Copilot in Visual Studio Code. GitHub Copilot also supports other editors including JetBrains IDEs (IntelliJ IDEA, PyCharm, WebStorm), Neovim, and others. Visit the [official GitHub Copilot documentation](https://docs.github.com/en/copilot) for installation instructions for other editors. + +## What should I do before installing GitHub Copilot? + +You need a GitHub account with an active GitHub Copilot subscription to use GitHub Copilot. + +If you don't have a GitHub account, visit [GitHub](https://github.com/) and sign up. + +To subscribe to GitHub Copilot, visit [GitHub Copilot pricing](https://github.com/features/copilot/plans) and choose a plan that fits your needs. GitHub Copilot offers individual, business, and enterprise plans, plus a free tier for verified students, teachers, and maintainers of popular open source projects. + +## How do I install GitHub Copilot in Visual Studio Code? + +Visual Studio Code is one of the most popular editors for using GitHub Copilot, and it works natively on Arm systems. + +### Install Visual Studio Code + +If you don't have Visual Studio Code installed, download and install it from the [offical download page]((https://code.visualstudio.com/download) for your operating system: + +- macOS (Apple Silicon): Download the the Apple Silicon version +- Linux (Arm): Download the Arm64 `.deb` or `.rpm` package +- Windows on Arm: Download the Arm64 installer + +For Linux, you can install VS Code using the package manager. On Ubuntu and Debian-based distributions: + +```bash { target="ubuntu:latest" } +curl -L "https://code.visualstudio.com/sha/download?build=stable&os=linux-deb-arm64" -o vscode-arm64.deb +sudo dpkg -i vscode-arm64.deb +sudo apt-get install -f +``` + +### Install the GitHub Copilot extension + +Open Visual Studio Code and install the GitHub Copilot extension: + +1. Open VS Code +2. Select the Extensions view by selecting the Extensions icon in the Activity Bar on the left side or pressing `Ctrl+Shift+X` (Windows/Linux) or `Cmd+Shift+X` (macOS) +3. Search for "GitHub Copilot" +4. Select **Install** on the "GitHub Copilot" extension by GitHub + +Alternatively, install from the command line: + +```console +code --install-extension GitHub.copilot +``` + +### Install the GitHub Copilot Chat extension + +For an enhanced experience with conversational AI assistance, install the GitHub Copilot Chat extension: + +```console +code --install-extension GitHub.copilot-chat +``` + +Or search for "GitHub Copilot Chat" in the Extensions view and install it. + +### Sign in to GitHub Copilot + +After installing the extension, you need to authorize it: + +1. In VS Code, select the GitHub Copilot icon in the status bar (bottom right) +2. Select **Sign in to GitHub** +3. Follow the prompts to authorize the extension in your browser +4. Return to VS Code to complete the setup + +## How do I confirm GitHub Copilot is working? + +You now have GitHub Copilot installed in Visual Studio Code. + +Confirm it's working by testing code suggestions: + +1. In VSCode, create a new file by selecting **File** → **New File** or pressing `Ctrl+N` (Windows/Linux) or `Cmd+N` (macOS) +2. Save the file with a programming language extension by selecting **File** → **Save As** (for example, `test.py` for Python, `test.js` for JavaScript, or `test.go` for Go) +3. In the VS Code editor window, type a comment describing a function: + - For Python: `# Function to calculate fibonacci numbers` + - For JavaScript: `// Function to calculate fibonacci numbers` + - For Go: `// Function to calculate fibonacci numbers` +4. Press Enter to start a new line + +GitHub Copilot analyzes your comment and suggests code. The suggestion appears as gray text in the editor. + +5. Press Tab to accept the suggestion, or press Esc to dismiss it +6. Continue typing to see more suggestions as you work + +If you don't see suggestions, check that: +- You're working in the Visual Studio Code editor window (not a terminal) +- The GitHub Copilot icon in the status bar (bottom right) shows it's active +- You're signed in to your GitHub account +- Your subscription is active + +## What are the different GitHub Copilot modes? + +GitHub Copilot Chat in Visual Studio Code offers three modes to help you work with code in different ways: Agent, Edit, and Ask modes. Each mode is designed for specific tasks. + +### Agent Mode + +Agent Mode enables GitHub Copilot to take autonomous actions in your workspace. In this mode, Copilot can: + +- Read and analyze multiple files in your project +- Make changes across different files +- Create new files and directories +- Execute tasks that require understanding your entire codebase + +To use Agent Mode: + +1. Open the GitHub Copilot Chat panel by selecting the chat icon in the Activity Bar or pressing `Ctrl+Enter` (Windows/Linux) or `Cmd+Ctrl+I` (macOS) +2. Type `@workspace` followed by your request or prompt to engage Agent Mode +3. Copilot analyzes your workspace and takes appropriate actions + +Example prompts for Agent Mode: +- `Create a python application to calculate fibonacci numbers on my arm machine` +- `@workspace add error handling throughout the application` + +### Edit Mode + +Edit Mode focuses on making targeted changes to your current file or selected code. This mode is useful when you want Copilot to modify existing code without creating new files. + +To use Edit Mode: + +1. Select the code you want to modify in your editor +2. Open the Copilot Chat panel +3. Type your request describing the changes you want + +Alternatively, use the inline chat: + +1. Select the code you want to modify +2. Press `Ctrl+I` (Windows/Linux) or `Cmd+I` (macOS) to open inline chat +3. Describe the changes you want to make +4. Copilot shows a preview of the changes before applying them + +Example prompts for Edit Mode: +- Select a function and ask: `Add input validation and error handling` +- Select a code block and ask: `Optimize this code for better performance on my arm machine` + +### Ask Mode + +Ask Mode is designed for questions and explanations. Use this mode when you want to understand code, learn about concepts, or get guidance without making changes to your files. + +To use Ask Mode: + +1. Open the GitHub Copilot Chat panel +2. Type your question directly without any special prefixes + +Example prompts for Ask Mode: +- `How does this function work?` (with code selected) +- `What are the best practices for error handling in Python?` + +## How do I use MCP Servers with GitHub Copilot? + +Model Context Protocol (MCP) Servers extend GitHub Copilot's capabilities by providing specialized tools and knowledge bases. GitHub Copilot can connect to MCP servers to access domain-specific expertise and functionality. + +The Arm MCP Server provides AI assistants with tools and knowledge for Arm architecture development, migration, and optimization. This is particularly useful when working on Arm-based systems. + +### What tools does the Arm MCP Server provide? + +The Arm MCP Server includes several tools designed for Arm development: + +- migrate-ease scan: Analyzes codebases for x86-specific code that needs updating for Arm compatibility +- skopeo: Inspects container images to check for ARM64 architecture support +- knowledge_base_search: Searches Arm documentation and learning resources +- mca (Machine Code Analyzer): Analyzes assembly code for performance on Arm architectures +- check_image: Verifies Docker image architecture compatibility + +### How do I configure the Arm MCP Server with GitHub Copilot? + +You need Docker running on your system to use the Arm MCP Server. See the [Docker install guide](/install-guides/docker/) for instructions. + +You can configure the MCP server using one of these methods: + +Method 1: Install from GitHub MCP Registry (recommended) + +The easiest way to install the Arm MCP Server is through the GitHub MCP Registry: + +1. Visit the [Arm MCP Server registry page](https://github.com/mcp/arm/arm-mcp) +2. Select the **Install MCP Server** button +3. From the dropdown, choose **Install in VSCode** +4. VS Code opens with the Arm MCP Server installation page +5. Select **Install** as you would with other extensions + +![MCP Server Install](/install-guides/_images/mcp-server-install.png "Figure 1. Install Arm MCP Server") + +This method automatically installs the Arm MCP Server and pulls the Docker image. No manual configuration is required. + +Method 2: Workspace configuration (recommended for sharing) + +For manual configuration, you can create a configuration file in your workspace. MCP servers can be configured in two locations: + +- For a specific repository: Create a `.vscode/mcp.json` file in the root of your repository. This enables you to share MCP server configuration with anyone who opens the project. +- For your personal VS Code instance: Add the configuration to your `settings.json` file. This makes the server available in all workspaces. + +{{% notice Note %}} +Use only one location per server to avoid conflicts and unexpected behavior. +{{% /notice %}} + +First, pull the Arm MCP Server image: + +```console +docker pull armlimited/arm-mcp:latest +``` + +Create a `.vscode` directory in your project root if it doesn't exist, then create an `mcp.json` file: + +```console +mkdir -p .vscode +``` + +Add the following configuration to `.vscode/mcp.json`: + +```json +{ + "servers": { + "arm-mcp": { + "type": "stdio", + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/path/to/your/codebase:/workspace", + "armlimited/arm-mcp:latest" + ] + } + } +} +``` + +Method 3: User configuration (available in all workspaces) + +Open the Command Palette (`Ctrl+Shift+P` on Windows/Linux or `Cmd+Shift+P` on macOS) and select **MCP: Open User Configuration**. This opens your user-level `mcp.json` file located at `~/Library/Application Support/Code/User/mcp.json` (macOS) or `%APPDATA%\Code\User\mcp.json` (Windows). + +Add the following configuration to the user-level `mcp.json` file: + +```json +{ + "servers": { + "arm-mcp": { + "type": "stdio", + "command": "docker", + "args": [ + "run", + "--rm", + "-i", + "-v", "/path/to/your/codebase:/workspace", + "armlimited/arm-mcp:latest" + ] + } + } +} +``` + +Save the file. A **Start** button appears at the top of the servers list in your `mcp.json` file. Select **Start** to start the Arm MCP Server. + +### How do I analyze a local codebase with the Arm MCP Server? + +To analyze code in your workspace, mount your local directory to the MCP server's `/workspace` folder using a volume mount. + +Update your `.vscode/mcp.json` configuration to include the volume mount. Replace `/path/to/your/codebase` with the actual path to your project + +For example, if your project is at `/Users/username/myproject`, the volume mount args in your `mcp.json` would be: + +```json +"-v", +"/Users/username/myproject:/workspace", +``` + +### How do I verify the Arm MCP Server is working? + +After saving the `.vscode/mcp.json` file, the **Start** button appears at the top of the servers list. Select **Start** to start the MCP server. + +To confirm the server is running: + +1. Open the Command Palette (`Ctrl+Shift+P` on Windows/Linux or `Cmd+Shift+P` on macOS) +2. Type and select **MCP: List Servers** +3. You should see `arm-mcp` listed as a Running configured server + +Open the GitHub Copilot Chat panel by selecting the chat icon in the Activity Bar. In the chat box, select **Agent** from the mode dropdown. + +To view available MCP tools, select the tools icon in the top left corner of the chat box. This opens the MCP server list showing all available tools from the Arm MCP Server. + +![MCP Server Tools](/install-guides/_images/new-mcp-server-tools.png "Figure 2. Tools loaded from the Arm MCP Server") + +You can also ask Copilot to use specific Arm MCP tools: + +``` +Use the Arm MCP Server to scan my codebase for x86-specific code +``` + +or + +``` +Check if the nginx:latest Docker image supports Arm64 +``` + +### Example prompts using the Arm MCP Server + +Here are some example prompts that use the Arm MCP Server tools: + +- `Scan my workspace for code that needs updating for Arm compatibility` +- `Check if the postgres:latest container image supports Arm64 architecture` +- `Search the Arm knowledge base for NEON intrinsics examples` +- `Find learning resources about migrating from x86 to Arm` + +### Troubleshooting MCP Server connections + +If the Arm MCP Server doesn't connect: + +- Verify Docker is running: `docker ps` +- Check that the image was pulled successfully: `docker images | grep arm-mcp` +- Ensure the timeout value (60000ms) is sufficient for your system +- Check VS Code Output panel (select **Output** → **GitHub Copilot Chat**) for error messages +- Restart VS Code after making configuration changes + +If you encounter issues or have questions, reach out to mcpserver@arm.com. + +You're ready to use GitHub Copilot with the Arm MCP Server to enhance your Arm development workflow. diff --git a/content/install-guides/kiro-cli.md b/content/install-guides/kiro-cli.md index 13e9dae4b0..99d8b1a7ba 100644 --- a/content/install-guides/kiro-cli.md +++ b/content/install-guides/kiro-cli.md @@ -106,7 +106,7 @@ kiro-cli version The output shows the version: ```output -kiro-cli 1.20.0 +kiro-cli 1.21.0 ``` ## How can I configure my AWS account to get the most from Kiro CLI? @@ -206,94 +206,62 @@ Use the arrow keys to select the model you want to use. You can ask Kiro to set the default model for future sessions. -## Install an MCP server +## Install a local MCP server -As an example of using MCP with Kiro, you can configure a local GitHub MCP server. +The Arm MCP Server is an MCP server providing AI assistants with tools and knowledge for Arm architecture development, migration, and optimization. This section shows how to configure the Arm MCP server locally using Docker. -Go to your GitHub account developer settings and create a personal access token with the following permissions: +First, pull the MCP server image to your local machine: -- `repo` (Full control of private repositories) -- `read:org` (Read organization membership) -- `read:user` (Read user profile data) +```console +docker pull armlimited/arm-mcp:latest +``` + +You also need Docker running on the system. See the [Docker install guide](/install-guides/docker/) for instructions. -Use an editor to add the content below to the file `$HOME/.kiro/settings/mcp.json`: +### How do I configure the Arm MCP server? + +Modify the file `~/.kiro/settings/mcp.json` to add the Arm MCP server via a Docker container. + +To analyze a local codebase, use a `-v` command to mount a volume to the Arm MCP server `/workspace` folder so it can access code you want to analyze with migrate-ease and other tools. + +Replace the path `/Users/yourname01/yourlocalcodebase` with the path to your local codebase: ```json { "mcpServers": { - "github": { + "arm_mcp_server": { "command": "docker", "args": [ "run", - "-i", "--rm", - "-e", - "GITHUB_PERSONAL_ACCESS_TOKEN", - "ghcr.io/github/github-mcp-server" + "-i", + "-v", "/Users/yourname01/yourlocalcodebase:/workspace", + "--name", "arm-mcp", + "armlimited/arm-mcp:latest" ], - "env": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "" - }, - "disabled": false, - "autoApprove": [] + "env": {}, + "timeout": 60000 } } } ``` -Replace `` with your GitHub personal access token. - -You also need Docker running on the system. See the [Docker install guide](/install-guides/docker/) for instructions. +### How do I verify the Arm MCP server is working? -Restart `kiro-cli` with the new MCP configuration: +Start Kiro CLI chat from your local shell and list the tools from the MCP server to verify it is working: ```console kiro-cli chat ``` -The output shows the GitHub MCP server loaded and running: - -```output -✓ github loaded in 0.16 s -✓ 1 of 1 mcp servers initialized. +Use the `/tools` command to list the available tools: +```console +/tools ``` -You can now use the GitHub MCP server to interact with GitHub repositories and do things like: - -**Repository Management** -- Create new repositories -- Fork existing repositories -- List branches and tags -- Create new branches - -**Code Management** -- Get file contents from repositories -- Create or update files -- Delete files -- Push multiple files in a single commit -- Search code across repositories - -**Pull Requests** -- Create pull requests -- List pull requests -- Get pull request details -- Update pull requests -- Merge pull requests -- Review pull requests -- Request GitHub Copilot reviews -- Get pull request files and comments - -**Issues** -- Create issues -- List issues -- Get issue details -- Update issues -- Add comments to issues -- Search issues - -**Commits** -- List commits -- Get commit details +You should see the Arm MCP server tools listed in the output. If the arm-mcp server says it's still loading, wait a moment and run `/tools` again. + +If you are facing issues or have questions, reach out to mcpserver@arm.com. You're ready to use Kiro CLI. diff --git a/content/install-guides/linux-migration-tools.md b/content/install-guides/linux-migration-tools.md new file mode 100644 index 0000000000..2617ee59b6 --- /dev/null +++ b/content/install-guides/linux-migration-tools.md @@ -0,0 +1,325 @@ +--- +title: Arm Linux Migration Tools + +draft: true + +additional_search_terms: +- migration +- porting +- analysis +- containers +- arm64 +- aarch64 +- sysreport +- skopeo +- llvm-mca +- topdown-tool +- kubearchinspect +- migrate-ease +- aperf +- bolt +- papi +- perf +- processwatch +- check-image +- porting-advisor + +minutes_to_complete: 20 + +author: Jason Andrews + +official_docs: https://github.com/arm/arm-linux-migration-tools + +test_images: +- ubuntu:latest +test_maintenance: true +test_link: null + +layout: installtoolsall +tool_install: true +multi_install: false +multitool_install_part: false +weight: 1 +--- + +[Arm Linux Migration Tools](https://github.com/arm/arm-linux-migration-tools) provides a comprehensive collection of thirteen essential utilities to help you migrate applications from x86 to Arm Linux systems. Instead of installing and managing each tool individually, you get everything you need in a single, streamlined installation. + +This package includes code analysis, performance, and migration tools such as Sysreport, Skopeo, LLVM Machine Code Analyzer, Telemetry Solution, KubeArchInspect, Migrate Ease, Aperf, BOLT, PAPI, Perf, Process Watch, Check Image, and Porting Advisor for Graviton. These tools help you assess application compatibility, analyze performance characteristics, optimize code layout, and identify potential issues before and after migration. + +Whether you're migrating containerized applications, analyzing system performance, or optimizing binaries for Arm processors, this package simplifies your migration workflow by providing all the necessary tools through a unified installation process, saving you time on setup and configuration. + +## What are the tools included in Arm Linux Migration Tools? + +The Arm Linux Migration Tools package includes the following utilities: + +| Tool | Purpose | Test Command | Documentation | +|------|---------|--------------|---------------| +| Sysreport | System analysis and reporting tool for performance and configuration checks | `sysreport --help` | [Get ready for performance analysis with Sysreport](/learning-paths/servers-and-cloud-computing/sysreport/) | +| Skopeo | Container image inspection and manipulation tool | `skopeo --help` | [Skopeo install guide](/install-guides/skopeo/) | +| LLVM Machine Code Analyzer | Machine Code Analyzer for performance analysis of compiled code | `llvm-mca --help` | [Learn about LLVM Machine Code Analyzer](/learning-paths/cross-platform/mca-godbolt/) | +| Telemetry Solution | Performance analysis methodology tool for Linux systems | `topdown-tool --help` | [Telemetry Solution](/install-guides/topdown-tool/) | +| KubeArchInspect | Kubernetes architecture inspection and reporting tool | `kubearchinspect --help` | [Migrate containers to Arm using KubeArchInspect](/learning-paths/servers-and-cloud-computing/kubearchinspect/) | +| Migrate Ease | Migration assistance tool for analyzing and porting workloads | `migrate-ease-cpp --help` | [Migrate applications to Arm servers using migrate-ease](/learning-paths/servers-and-cloud-computing/migrate-ease/) | +| Aperf | Performance monitoring tool for Linux systems | `aperf --help` | [Aperf install guide](/install-guides/aperf/) | +| BOLT | Binary optimization and layout tool (part of LLVM) | `llvm-bolt --help` | [BOLT install guide](/install-guides/bolt/) | +| PAPI | Performance API for accessing hardware performance counters | `papi_avail -h` | [PAPI install guide](/install-guides/papi/) | +| Perf | Linux performance analysis tool for profiling and tracing | `perf --help` | [Perf install guide](/install-guides/perf/) | +| Process Watch | Process monitoring tool for Linux systems | `processwatch -h` | [Run Process watch on your Arm machine](/learning-paths/servers-and-cloud-computing/processwatch/) | +| Check Image | Checks a container image for Arm architecture support | `check-image -h` | [Check Image on learn.arm.com](/learning-paths/cross-platform/docker/check-images/) | +| Porting Advisor for Graviton | Tool to assess portability of software to Arm architecture | `porting-advisor --help` | [Porting Advisor install guide](/install-guides/porting-advisor/) | + +Each tool serves a specific purpose in the migration process, from analyzing system configurations and performance characteristics to optimizing binaries and checking container compatibility. + +## Prerequisites + +Before installing the Arm Linux Migration Tools, verify that your system meets the following requirements: + +### Architecture verification + +This package is designed for Arm Linux systems. Verify that you're running on the Arm architecture: + +```bash +uname -m +``` + +The output should be `aarch64`. If you see `x86_64` or another architecture, this package isn't compatible with your system. + +### Operating system support + +The Arm Linux Migration Tools package supports Ubuntu 22.04 and Ubuntu 24.04. + +### Required dependencies + +Some tools in the package require additional software to function properly: + +Docker or Podman is required for Migrate Ease. You can install Docker with the [Docker install guide](/install-guides/docker/docker-engine). + +Alternatively, if you prefer Podman over Docker, install it with the Ubuntu package manager: + +```bash +sudo apt install -y podman +``` + +## Installation + +The Arm Linux Migration Tools package provides multiple installation methods to suit different preferences and environments. Choose the method that works best for your setup. + +### Option 1: Single-line installation + +The fastest way to install the Arm Linux Migration Tools is with the automated installation script. This method downloads and installs the latest release automatically: + +```bash +curl -sSL https://github.com/arm/arm-linux-migration-tools/releases/download/v3/install.sh | sudo bash +``` + +The command above performs the following actions: +- Downloads the installation script `install.sh` from the GitHub repository +- Automatically detects your system architecture +- Installs required dependencies using the package manager +- Downloads the appropriate release package for your system +- Installs the included tools in `/opt/arm-migration-tools/` +- Creates wrapper scripts in `/usr/local/bin` for easy access +- Configures a Python virtual environment with the required dependencies + +The installation script prompts for your password when `sudo` access is required for copying files to system directories. + +If this is your first install, review the script before running it with `curl | bash`. + +```console +curl -fsSL https://github.com/arm/arm-linux-migration-tools/releases/download/v3/install.sh | more +``` + +This command displays the script so you can verify what it does before executing it. + +### Option 2: Download and install + +For manual installation, download the release tar file for a specific version from GitHub and install it. + +Download the latest release file with `wget`: + +```bash +wget https://github.com/arm/arm-linux-migration-tools/releases/download/v3/arm-migration-tools-v3-arm64.tar.gz +``` + +Extract the downloaded tar file to a temporary directory: + +```bash +tar xvfz arm-migration-tools-v3-arm64.tar.gz +``` + +Navigate to the extracted directory and run the installation script: + +```bash +cd arm-linux-migration-tools +sudo ./scripts/install.sh +``` + +After successful installation, you can remove the downloaded files: + +```bash +cd .. +rm -rf arm-linux-migration-tools arm-migration-tools-v3-arm64.tar.gz +``` + +### Option 3: Build from source + +To build the Arm Linux Migration Tools from source code, clone the repository and build it locally. This method is useful for developers who want to modify the tools or contribute to the project. + +First, clone the GitHub repository to your local system: + +```bash +git clone https://github.com/arm/arm-linux-migration-tools.git +``` + +Change to the cloned repository directory: + +```bash +cd arm-linux-migration-tools +``` + +Run the build script to compile and prepare all tools: + +```console +./scripts/build.sh +``` + +The build script performs the following tasks: +- Downloads and compiles source code for the tools that have source +- Resolves dependencies and builds binaries +- Creates the directory structure for installation +- Prepares a Python virtual environment and installs required packages +- Validates that all tools build successfully + +After a successful build, install the tools to your system: + +```console +sudo ./scripts/install.sh +``` + +The install script: +- Installs the locally built tools to `/opt/arm-migration-tools/` +- Creates wrapper scripts in `/usr/local/bin` for easy access +- Sets up the Python virtual environment with the built dependencies +- Configures permissions for all installed components + +After successful installation, you can clean up the build directory: + +```bash +cd .. +rm -rf arm-linux-migration-tools +``` + +## Installation locations + +After successful installation, the Arm Linux Migration Tools are organized in a structured directory layout that separates the core tools, dependencies, and user-accessible commands. Understanding this structure helps you troubleshoot issues and manage the installation effectively. + +All tools and their dependencies are installed in the `/opt/arm-migration-tools/` directory. Review the details of the software installed there. + +For easy access from anywhere in your system, wrapper scripts are also installed in `/usr/local/bin/`, making it easier to invoke each tool. View the available executables there. + +### Python virtual environment + +A Python virtual environment is located at `/opt/arm-migration-tools/venv/` and contains all Python packages required by the migration tools. + +The wrapper scripts in `/usr/local/bin` use this environment automatically, but you can activate it with: + +```bash +source /opt/arm-migration-tools/venv/bin/activate +``` + +You can view the installed Python packages in the virtual environment: + +```bash +/opt/arm-migration-tools/venv/bin/pip list +``` + +The output is similar to: + +```output +Package Version Editable project location +------------------------- ---------- -------------------------------------------------------------- +altgraph 0.17.3 +annotated-types 0.7.0 +attrs 25.4.0 +blinker 1.9.0 +certifi 2025.11.12 +charset-normalizer 3.4.4 +click 8.3.1 +Flask 3.1.2 +gitdb 4.0.12 +GitPython 3.1.45 +idna 3.11 +itsdangerous 2.2.0 +Jinja2 3.1.4 +jsonschema 4.25.1 +jsonschema-specifications 2025.9.1 +lxml 5.3.2 +markdown-it-py 4.0.0 +MarkupSafe 2.1.3 +mdurl 0.1.2 +packaging 23.1 +pip 25.3 +progressbar33 2.4 +pycryptodome 3.19.1 +pydantic 2.11.7 +pydantic_core 2.33.2 +Pygments 2.19.2 +pyparsing 3.1.1 +python-magic 0.4.27 +referencing 0.37.0 +requests 2.32.5 +rich 14.1.0 +rpds-py 0.30.0 +ruamel.yaml 0.18.16 +ruamel.yaml.clib 0.2.15 +smmap 5.0.2 +topdown-tool 1.0.0 /opt/arm-migration-tools/telemetry-solution/tools/topdown_tool +typing_extensions 4.15.0 +typing-inspection 0.4.2 +urllib3 2.6.2 +uuid 1.30 +Werkzeug 3.1.4 +XlsxWriter 3.1.2 +``` + +## Verify installation + +After installing the Arm Linux Migration Tools, verify that all tools are working correctly. The package includes a comprehensive test script to validate your installation. + +Execute the included test script to verify all 13 tools are properly installed and functional: + +```bash +/opt/arm-migration-tools/scripts/arm-migration-tools-test.sh +``` + +The script performs a basic invocation of each tool to confirm it runs. + +{{% notice Note %}} +If you get an error on Topdown Tool you need to run the commands below. +```console +sudo sh -c "echo -1 > /proc/sys/kernel/perf_event_paranoid" +sudo sh -c "echo 0 > /proc/sys/kernel/kptr_restrict" +``` +More information about the options is in the [Linux Perf install guide](/install-guides/perf/). +{{% /notice %}} + +### Next steps + +Now that you have successfully installed and verified the Arm Linux Migration Tools, you're ready to begin. + +You can review the [Arm Migration overview](/migration/) for additional guidance. + +## Uninstall + +If you need to remove the Arm Linux Migration Tools from your system, use the included uninstall script. This process completely removes all installed components and restores your system to its previous state. + +To uninstall the Arm Linux Migration Tools package, run the uninstall script: + +```bash +sudo /opt/arm-migration-tools/scripts/uninstall.sh +``` + +The uninstall script performs the following tasks: +- Removes all tools from `/opt/arm-migration-tools/` directory +- Deletes all wrapper scripts from `/usr/local/bin/` +- Removes the Python virtual environment and all installed dependencies diff --git a/content/learning-paths/automotive/_index.md b/content/learning-paths/automotive/_index.md index 996e85ca16..7309a26232 100644 --- a/content/learning-paths/automotive/_index.md +++ b/content/learning-paths/automotive/_index.md @@ -11,12 +11,13 @@ subtitle: Build secure, connected, smart IoT devices title: Automotive weight: 4 subjects_filter: -- Containers and Virtualization: 3 +- Containers and Virtualization: 4 - Performance and Architecture: 6 operatingsystems_filter: - Baremetal: 1 -- Linux: 8 +- Linux: 9 - macOS: 1 +- other: 1 - RTOS: 1 tools_software_languages_filter: - Arm Development Studio: 1 @@ -33,6 +34,7 @@ tools_software_languages_filter: - Raspberry Pi: 1 - ROS 2: 3 - Rust: 1 +- Tinkerblox: 1 - topdown-tool: 1 - Zenoh: 1 --- diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/_index.md b/content/learning-paths/automotive/tinkerblox_ultraedge/_index.md new file mode 100644 index 0000000000..aad23be2d9 --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/_index.md @@ -0,0 +1,56 @@ +--- +title: UltraEdge HPC-I execution fabric for AI & mixed workloads + +draft: true +cascade: + draft: true + +minutes_to_complete: 60 + +who_is_this_for: Business teams targeting lower TCO of computing infrastructure through higher utilization of CPU and/or CPU-GPU install bases. R&D and Engineering teams looking for most efficient use of CPU/CPU-GPU infrastructure. Innovation teams looking to maximize edge resources to host new-age AI on constrained environments. Development teams looking at alternative packaging tech – run-time environments to build next generation workloads. + +learning_objectives: + - Understand the layered architecture of UltraEdge **core**, **boost**, and **prime**. + - Build applications using the **UltraEdge MicroStack** + - Deploy the MicroPacs on **Linux-based compute systems** and scale to + cloud or data-centre environments. + - Optimize performance for **edge-cloud scenarios**, enabling near + real-time data flows. + + +prerequisites: + - Experience using **Linux** on embedded or SBC platforms. + - Understanding of **container runtimes** (containerd) and **CNI + networking**. + - Basic knowledge of **communication protocols** (MQTT, HTTP, etc.). + - (Optional but helpful) dFamiliarity with **edge-cloud + architectures** and **data-flow orchestration**. + +author: Tinkerblox + +### Tags +skilllevels: Advanced +subjects: Containers and Virtualization +tools_software_languages: + - Tinkerblox + +armips: + - Neoverse + +operatingsystems: + - Linux + - other + +further_reading: + - resource: + title: Tinkerblox + link: https://tinkerblox.io + type: website + + +### FIXED, DO NOT MODIFY +# ================================================================================ +weight: 1 # _index.md always has weight of 1 to order correctly +layout: "learningpathall" # All files under learning paths have this same wrapper +learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. +--- diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/_next-steps.md b/content/learning-paths/automotive/tinkerblox_ultraedge/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/background.md b/content/learning-paths/automotive/tinkerblox_ultraedge/background.md new file mode 100644 index 0000000000..ecc1e54fec --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/background.md @@ -0,0 +1,55 @@ +--- +title: UltraEdge HPC-I execution fabric for AI & mixed workloads + +weight: 2 + +layout: "learningpathall" +--- + +{{% notice Note %}} +REMOVE ME: Need to review content for Intro/background... +{{% /notice %}} + +### Overview + +UltraEdge was built with the vision of orchestrating the edge-native +execution fabric for high-performance compute infrastructure + +- UltraEdge is a ‘built-for-edge’ adaptive **AI & Mixed Workloads** + execution stack built on the ethos of high performance, high + fungibility & ultra-low footprint +- Developed through strategic alliances with world-renowned technology + powerhouses +- Clear dual focus on Mixed workloads and new-age AI workloads +- Full stack enablement through MicroStack & NeuroStack systems +- Curated for AI@Edge with preferred edge deployment approach by Edge + AI Foundation +- Managed cluster” orchestration through integration with Kube-stack + and/or Slurm +- Observability for control plane, diagnostics & telemetry +- Demonstrable value to customer through lower TCO of CPU-GPU clusters + +### UltraEdge High-Level Architecture + +{{% notice Note %}} +REMOVE ME: It would be good to put a high-level picture of the architecture here. Then text below can detail the high points. +{{% /notice %}} + +**UltraEdge ‘Core’ Layer
** +Handles compute infrastructure management including service +orchestration, lifecycle management, rule engine orchestration, and +data-flow management . + +**UltraEdge ‘Boost’ Layer
** +Implements performance-critical routines and FFI (Foreign Function +Interface) calls; Contains dynamic connectors, and southbound protocol +adapters + +**UltraEdge ‘Prime’ Layer
** +Contains business logic, trigger & activation sequences, and AI & mixed +workload orchestration . + +**UltraEdge Edge-Cloud ‘Connect’ Layer
** +Supports data streaming to databases (InfluxDB, SQLite) and provides +diagnostic/logging outputs . **UltraEdge Dock** Supports workload orchestration +management through kube-stack or slurm. diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/debian_installation.md b/content/learning-paths/automotive/tinkerblox_ultraedge/debian_installation.md new file mode 100644 index 0000000000..5a346dc8b8 --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/debian_installation.md @@ -0,0 +1,180 @@ +--- +title: DEBIAN Installation - UltraEdge + +weight: 4 + +layout: "learningpathall" +--- + +#### Installation Process + +{{% notice Note %}} +REMOVE ME: Need link information to "Uncloud" below... +{{% /notice %}} + +- Copy device installation details from **Uncloud**. +- Device Initialization + + 1. Copy the command below into the clipboard. + 2. Open terminal on your device. + 3. Paste the copied command into terminal to initialize the device. + + + {{% notice Note %}} +REMOVE ME: Not sure what "example code" means below... is this what the user needs to execute or is it just an example? +{{% /notice %}} + + Just an example code. You will find the exact to execute for your device in unclound + ```bash + sudo apt update && sudo apt install curl && sudo apt install jq -y && sudo DEVICE_ID="5b3ff290-0c88-4cd9-8ef7-08de0bded9df" KEY="TB.ApiKey-mlBZgDFc7qyM6ztPjILBCbFEqnVlbvjUpM1Q1IqNP6tA7wNdi97AQ==" sh -c "$(curl "https://tinkerbloxdev.blob.core.windows.net:443/tinkerbloxdev/binaries/installer.sh?sv=2025-01-05&st=2025-11-03T06%3A31%3A55Z&se=2025-11-03T06%3A56%3A55Z&sr=b&sp=r&sig=HNS70HgJyHlhCVQrqvpGdCcaf8%2FtVjdW4RNiiiIPCSUA%3D")" + ``` + +- Paste the copied content in the target terminal and execute. + +#### Activation of Agent + +On the first boot, the agent will automatically generate a file named +`activation_key.json` at the path: + + /opt/tinkerblox/activation_key.json + +Share this `activation_key.json` file with the TinkerBlox team to +receive license key (which includes license metadata). + +1. Stop the agent using the following command: + + sudo systemctl stop ultraedge.service + +2. Replace the existing `activation_key.json` file in + `/opt/tinkerblox/` with the licensed one provided by TinkerBlox. + +3. Start the agent: + + sudo systemctl start ultraedge.service + +#### Manual Running + +- Binary path: `/usr/bin/EdgeBloXagent` + +- To start: + + EdgeBloXagent + +- To stop, press Ctrl + + C once. + +## MicroPac Installation + +{{% notice Note %}} +REMOVE ME: Is MicroPac only for Debian installations? Not for YOCTO ones? +{{% /notice %}} + +#### System Requirements + +- Linux host (aarch64) +- Sudo permissions +- Overlay filesystem support +- Internet connection + +#### Required Packages + + sudo apt-get update + sudo apt-get install -y tar curl qemu-user-static binfmt-support + +### Cross-Architecture Support + +{{% notice Note %}} +REMOVE ME: Might need a bit more detail on why this needs to be executed (below): +{{% /notice %}} + +To build MicroPac for different architectures: + # Enable binfmt for armv7 + sudo update-binfmts --enable qemu-armv7 + +### Installation + +- The package is provided as a `.deb` file. + +- Install it on your host machine: + + sudo apt install ./.deb + +### MicroPac File Schema file creation/setup + +{{% notice Note %}} +REMOVE ME: Need more information on how to setup your project directory/where its located +{{% /notice %}} + +Place a `MicroPacFile` in your project directory. + +```console + name: nginx + version: 1.0.0.0 + target: aarch64 + applicationType: custom + image: Alpine:3.21 + createdBy: developer@tinkerblox.io + description: Nginx web server microservice + + buildSteps: + # Install nginx and create necessary directories + - run: apk add --no-cache nginx + - run: mkdir -p /var/www/html /var/log/nginx /var/lib/nginx /var/tmp/nginx + + # Copy configuration files + - copy: [nginx.conf, /etc/nginx/nginx.conf] + - copy: [index.html, /var/www/html/index.html] + - copy: [404.html, /var/www/html/404.html] + + # Copy startup script + - workdir: /app + - copy: [nginx_start.sh, .] + - run: chmod +x ./nginx_start.sh + + # Set proper permissions + #- run: chown -R nginx:nginx /var/www/html /var/log/nginx /var/lib/nginx /var/tmp/nginx + + entry: /app/nginx_start.sh + mode: continuous-run + + env: + NGINX_PORT: 8080 + APP_ENV: production + + network: + mode: host + name: nginx-net +``` +### Configuration Fields + +#### Required Fields + +- **name**: Application name (≤ 10 characters) +- **version**: Application version +- **target**: Target architecture +- **applicationType**: Application type (python, binary, custom) +- **image**: Base image +- **entry**: Entry point command +- **mode**: single-run + +#### Optional Fields + +- **env**: Environment variable +- **buildSteps**: Array of build instructions +- **limits**: Resource limits (memory, cpu) +- **mount**: Volume mount points +- **network**: Network configuration +- **createdBy**: maintanier of the application +- **description**: description of the application + +### Building the MicroPac + +Navigate to your project directory and execute: + + sudo micropac-builder build + +This generates a file named `.mpac`. + +{{% notice Note %}} +REMOVE ME: Is there a way to confirm that Micropac is properly setup now? +{{% /notice %}} diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/main.md b/content/learning-paths/automotive/tinkerblox_ultraedge/main.md new file mode 100644 index 0000000000..04913ef6b8 --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/main.md @@ -0,0 +1,464 @@ +
+UltraEdge Middleware + +Updated: 2025-11-01 Skill: +Intermediate → Advanced + +Contents + +1. [About](#1-about-this-learning-path--project) +2. [Who is this for?](#2-who-is-this-for) +3. [What will you learn?](#3-what-will-you-learn) +4. [Prerequisites](#4-prerequisites) +5. [Overview & Architecture](#5-overview--architecture) +6. [Installation & Setup](#6-installation--setup) +7. [MicroPac](#7-micropac) +8. [Tinkerblox CLI](#8-tinkerblox-cli-usage-guide) +9. [Troubleshooting](#9-troubleshooting) + +# UltraEdge High-Performance Compute Infrastructure + +Learning Path / User Guide • Reading time: ~20 min + +## 1. About this Learning Path / Project + +UltraEdge HPC-I is the execution fabric for AI & mixed workloads for +new-age compute infrastructure – automotive, smart products & technology +infrastructure industries. + +UltraEdge forms the recommended edge architecture orchestration for +SOAFEE standard for software defined vehicles \[SDVs\]. UltraEdge +transforms HPC workload management with reduced workload package size, +lower resource utilization footprint and architecture-redefining +workload startup. + +Smart product OEMs leverage UltraEdge to shorten time-to-market for new +products and to ‘AIoT-ize’ its existing install base of legacy products +with Plug-n-Play additions. + +For technology infrastructure industry \[especially data centers\], +UltraEdge is pathbreaking in its ability to unlock under-utilized +compute power with the double whammy of higher performance at lower TCO. + +## 2. Who is this for? + +- Business teams targeting lower TCO of computing infrastructure + through higher utilization of CPU and/or CPU-GPU install bases . +- R&D and Engineering teams looking for most efficient use of + CPU/CPU-GPU infrastructure . +- Innovation teams looking to maximize edge resources to host new-age + AI on constrained environments. +- Development teams looking at alternative packaging tech – run-time + environments to build next generation workloads. + +## 3. What will you learn? + +By the end of this guide, you will be able to: + +- Understand the layered architecture of UltraEdge: **core**, + **boost**, and **prime**. +- Build applications using the **UltraEdge MicroStack** +- Deploy the MicroPacs on **Linux-based compute systems** and scale to + cloud or data-centre environments. +- Optimize performance for **edge-cloud scenarios**, enabling near + real-time data flows. + +## 4. Prerequisites + +Before starting, ensure you have: + +- Experience using **Linux** on embedded or SBC platforms. +- Understanding of **container runtimes** (containerd) and **CNI + networking**. +- Basic knowledge of **communication protocols** (MQTT, HTTP, etc.). +- *(Optional but helpful)* Familiarity with **edge-cloud + architectures** and **data-flow orchestration**. + +## 5. Overview & Architecture + +## 5.1 Overview + +UltraEdge was built with the vision of orchestrating the edge-native +execution fabric for high-performance compute infrastructure + +- UltraEdge is a ‘built-for-edge’ adaptive **AI & Mixed Workloads** + execution stack built on the ethos of high performance, high + fungibility & ultra-low footprint +- Developed through strategic alliances with world-renowned technology + powerhouses +- Clear dual focus on Mixed workloads and new-age AI workloads +- Full stack enablement through MicroStack & NeuroStack systems +- Curated for AI@Edge with preferred edge deployment approach by Edge + AI Foundation +- Managed cluster” orchestration through integration with Kube-stack + and/or Slurm +- Observability for control plane, diagnostics & telemetry +- Demonstrable value to customer through lower TCO of CPU-GPU clusters + +### 5.2 High-Level Architecture + +**UltraEdge ‘Core’ Layer
** +Handles compute infrastructure management including service +orchestration, lifecycle management, rule engine orchestration, and +data-flow management . + +**UltraEdge ‘Boost’ Layer
** +Implements performance-critical routines and FFI (Foreign Function +Interface) calls; Contains dynamic connectors, and southbound protocol +adapters + +**UltraEdge ‘Prime’ Layer
** +Contains business logic, trigger & activation sequences, and AI & mixed +workload orchestration . + +**UltraEdge Edge-Cloud ‘Connect’ Layer
** +Supports data streaming to databases (InfluxDB, SQLite) and provides +diagnostic/logging outputs . + +**UltraEdge Dock** +Supports workload orchestration management through kube-stack or slurm. +. + +## 6. Installation & Setup + +### 6.1 System Requirements + +- Linux host (Aarch64 (arm64),armv7) + +------------------------------------------------------------------------ + +### 6.2 Yocto + +#### 6.2.1 Build Instructions + +1. Copy the `meta-tinkerblox` folder to your Yocto build environment. + +2. Add the layer: + + bitbake-layers add-layer + +3. Build and flash the firmware to the target hardware. + +#### 6.2.2 Activation of Agent + +On the first boot, the agent will automatically generate a file named +`activation_key.json` at the path: + + /opt/tinkerblox/activation_key.json + +Share this `activation_key.json` file with the TinkerBlox team to +receive license key (which includes license metadata). + +1. Stop the agent using the following command: + + sudo systemctl stop ultraedge.service + +2. Replace the existing `activation_key.json` file in + `/opt/tinkerblox/` with the licensed one provided by TinkerBlox. + +3. Start the agent: + + sudo systemctl start ultraedge.service + +#### 6.2.3 Manual Running + +- Binary path: `/opt/tinkerblox/Ultraedge/EdgeBloXagent` + +- To start: + + cd /opt/tinkerblox/Ultraedge + ./EdgeBloXagent + +- To stop, press Ctrl + + C once. + +------------------------------------------------------------------------ + +### 6.3 Other Distributions + +#### 6.3.1 Installation Process + +- Copy device installation details from **Uncloud**. +- Device Initialization + + 1. Copy the command below into the clipboard. + 2. Open terminal on your device. + 3. Paste the copied command into terminal to initialize the device. + + Just an example code. You will find the exact to execute for your device in unclound + ```bash + sudo apt update && sudo apt install curl && sudo apt install jq -y && sudo DEVICE_ID="5b3ff290-0c88-4cd9-8ef7-08de0bded9df" KEY="TB.ApiKey-mlBZgDFc7qyM6ztPjILBCbFEqnVlbvjUpM1Q1IqNP6tA7wNdi97AQ==" sh -c "$(curl "https://tinkerbloxdev.blob.core.windows.net:443/tinkerbloxdev/binaries/installer.sh?sv=2025-01-05&st=2025-11-03T06%3A31%3A55Z&se=2025-11-03T06%3A56%3A55Z&sr=b&sp=r&sig=HNS70HgJyHlhCVQrqvpGdCcaf8%2FtVjdW4RNiiiIPCSUA%3D")" + ``` + +- Paste the copied content in the target terminal and execute. + +#### 6.3.2 Activation of Agent + +On the first boot, the agent will automatically generate a file named +`activation_key.json` at the path: + + /opt/tinkerblox/activation_key.json + +Share this `activation_key.json` file with the TinkerBlox team to +receive license key (which includes license metadata). + +1. Stop the agent using the following command: + + sudo systemctl stop ultraedge.service + +2. Replace the existing `activation_key.json` file in + `/opt/tinkerblox/` with the licensed one provided by TinkerBlox. + +3. Start the agent: + + sudo systemctl start ultraedge.service + +#### 6.3.3 Manual Running + +- Binary path: `/usr/bin/EdgeBloXagent` + +- To start: + + EdgeBloXagent + +- To stop, press Ctrl + + C once. + +## 7. MicroPac + +### 7.1 Prerequisites + +#### 7.1.1 System Requirements + +- Linux host (aarch64) +- Sudo permissions +- Overlay filesystem support +- Internet connection + +#### 7.1.2 Required Packages + + sudo apt-get update + sudo apt-get install -y tar curl qemu-user-static binfmt-support + +------------------------------------------------------------------------ + +### Cross-Architecture Support + +To build MicroPac for different architectures: + + + # Enable binfmt for armv7 + sudo update-binfmts --enable qemu-armv7 + +------------------------------------------------------------------------ + +### 7.2 Installation + +- The package is provided as a `.deb` file. + +- Install it on your host machine: + + sudo apt install ./.deb + +### 7.3 MicroPac File Schema + +Place a `MicroPacFile` in your project directory. + +**Example Schema:** + + name: nginx + version: 1.0.0.0 + target: aarch64 + applicationType: custom + image: Alpine:3.21 + createdBy: developer@tinkerblox.io + description: Nginx web server microservice + + buildSteps: + # Install nginx and create necessary directories + - run: apk add --no-cache nginx + - run: mkdir -p /var/www/html /var/log/nginx /var/lib/nginx /var/tmp/nginx + + # Copy configuration files + - copy: [nginx.conf, /etc/nginx/nginx.conf] + - copy: [index.html, /var/www/html/index.html] + - copy: [404.html, /var/www/html/404.html] + + # Copy startup script + - workdir: /app + - copy: [nginx_start.sh, .] + - run: chmod +x ./nginx_start.sh + + # Set proper permissions + #- run: chown -R nginx:nginx /var/www/html /var/log/nginx /var/lib/nginx /var/tmp/nginx + + entry: /app/nginx_start.sh + mode: continuous-run + + env: + NGINX_PORT: 8080 + APP_ENV: production + + network: + mode: host + name: nginx-net + +### 7.3 Configuration Fields + +#### Required Fields + +- **name**: Application name (≤ 10 characters) +- **version**: Application version +- **target**: Target architecture +- **applicationType**: Application type (python, binary, custom) +- **image**: Base image +- **entry**: Entry point command +- **mode**: single-run + +#### Optional Fields + +- **env**: Environment variable +- **buildSteps**: Array of build instructions +- **limits**: Resource limits (memory, cpu) +- **mount**: Volume mount points +- **network**: Network configuration +- **createdBy**: maintanier of the application +- **description**: description of the application + +### 7.4 Building the MicroPac + +Navigate to your project directory and execute: + + sudo micropac-builder build + +This generates a file named `.mpac`. + +## 8. Tinkerblox CLI Usage Guide + +Tinkerblox Command Line Interface for managing the Edge Agent and +microservices. + +**Usage:** + + tinkerblox-cli [OPTIONS] + +**Commands:** + +- `status` — Show connection status with the Edge Agent +- `microboost` — Microservice management commands +- `help` — Print this message or the help of the given subcommand(s) + +**Options:** + +- `-h`, `--help` — Print help +- `-V`, `--version` — Print version + +### Usage + +#### 1. Check CLI Connection Status + + sudo tinkerblox-cli status + +*Displays whether the CLI is connected to the Edge Agent.* + +#### 2. Microservice Management + +Manage microservices running on the Edge platform. + +**Syntax:** + + sudo tinkerblox-cli microboost [options] + +##### Available Commands + +* **install** + Installs a microservice. You must provide the path to the MPAC file as an argument. + + sudo tinkerblox-cli microboost install -f /path/to/your.mpac + +* **list** + Lists all installed microservices. + + sudo tinkerblox-cli microboost list + +* **status ** + Shows statistics (CPU, memory, status, etc.) for the specified microservice. + + sudo tinkerblox-cli microboost status + +* **stop ** + Stops the microservice with the specified ID. + + sudo tinkerblox-cli microboost stop + +* **start ** + Starts the microservice with the specified ID (must be stopped). + + sudo tinkerblox-cli microboost start + +* **uninstall ** + Uninstalls the microservice with the specified ID. + + sudo tinkerblox-cli microboost uninstall + +#### 3. Diagnostics Management + +Run diagnostics on the Edge platform. + +**Syntax:** + + sudo tinkerblox-cli diagnostics + +**full** +Run complete system diagnostics and summarize results + + sudo tinkerblox-cli diagnostics full + +**system** +Check CPU, memory, and OS-level health + + sudo tinkerblox-cli diagnostics system + +**network** +Verify network connectivity and endpoint reachability + + sudo tinkerblox-cli diagnostics network + +**filesystem** +Validate database/filesystem connectivity and integrity + + sudo tinkerblox-cli diagnostics filesystem + +**engine** +Check engine microboost neuroboost + + sudo tinkerblox-cli diagnostics engine + +## 9. Troubleshooting + +**Permission Denied** + +- Ensure `sudo` privileges. +- Check directory ownership and permissions. +- Verify overlay filesystem support. + +**Directory Creation Failed** + +- Check disk space. +- Verify parent directory permissions. +- Ensure the path is valid. + +**Cross-Architecture Build Issues** + +- Verify QEMU installation: + + qemu-aarch64-static --version + +- Check binfmt registration: + + ls /proc/sys/fs/binfmt_misc/ + +- Ensure the target architecture is enabled. + +- If issues persist, change the host architecture. +
\ No newline at end of file diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/running.md b/content/learning-paths/automotive/tinkerblox_ultraedge/running.md new file mode 100644 index 0000000000..d7ba891f7f --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/running.md @@ -0,0 +1,148 @@ +--- +title: RUNNING Setup/Installation - UltraEdge HPC-I execution fabric for AI & mixed workloads + +weight: 5 + +layout: "learningpathall" +--- + +{{% notice Note %}} +REMOVE ME: We probably need to come up with an actual "exercise" here to exercise tinkerblox... below is only a reference to the CLI commands... what do we want the user to explore/experiment and see with tinkerblox? +{{% /notice %}} + +### Tinkerblox CLI Usage Guide + +Tinkerblox Command Line Interface for managing the Edge Agent and +microservices. + +**Usage:** + + tinkerblox-cli [OPTIONS] + +**Commands:** + +- `status` — Show connection status with the Edge Agent +- `microboost` — Microservice management commands +- `help` — Print this message or the help of the given subcommand(s) + +**Options:** + +- `-h`, `--help` — Print help +- `-V`, `--version` — Print version + +### Usage + +#### Check CLI Connection Status + + sudo tinkerblox-cli status + +*Displays whether the CLI is connected to the Edge Agent.* + +#### Microservice Management + +Manage microservices running on the Edge platform. + +**Syntax:** + + sudo tinkerblox-cli microboost [options] + +##### Available Commands + +* **install** + Installs a microservice. You must provide the path to the MPAC file as an argument. + + sudo tinkerblox-cli microboost install -f /path/to/your.mpac + +* **list** + Lists all installed microservices. + + sudo tinkerblox-cli microboost list + +* **status ** + Shows statistics (CPU, memory, status, etc.) for the specified microservice. + + sudo tinkerblox-cli microboost status + +* **stop ** + Stops the microservice with the specified ID. + + sudo tinkerblox-cli microboost stop + +* **start ** + Starts the microservice with the specified ID (must be stopped). + + sudo tinkerblox-cli microboost start + +* **uninstall ** + Uninstalls the microservice with the specified ID. + + sudo tinkerblox-cli microboost uninstall + +#### Diagnostics Management + +Run diagnostics on the Edge platform. + +**Syntax:** + + sudo tinkerblox-cli diagnostics + +**full** +Run complete system diagnostics and summarize results + + sudo tinkerblox-cli diagnostics full + +**system** +Check CPU, memory, and OS-level health + + sudo tinkerblox-cli diagnostics system + +**network** +Verify network connectivity and endpoint reachability + + sudo tinkerblox-cli diagnostics network + +**filesystem** +Validate database/filesystem connectivity and integrity + + sudo tinkerblox-cli diagnostics filesystem + +**engine** +Check engine microboost neuroboost + + sudo tinkerblox-cli diagnostics engine + +## Troubleshooting + +{{% notice Note %}} +REMOVE ME: We probably need to outline more where the errors might be seen while running a specific task/exercise... +{{% /notice %}} + +**Permission Denied** + +- Ensure `sudo` privileges. +- Check directory ownership and permissions. +- Verify overlay filesystem support. + +**Directory Creation Failed** + +- Check disk space. +- Verify parent directory permissions. +- Ensure the path is valid. + +**Cross-Architecture Build Issues** + +{{% notice Note %}} +REMOVE ME: are there any tel-tale signs that my issue is a cross-architecture build issue? Is this a Yocto build issue? +{{% /notice %}} + +- Verify QEMU installation: + + qemu-aarch64-static --version + +- Check binfmt registration: + + ls /proc/sys/fs/binfmt_misc/ + +- Ensure the target architecture is enabled. + +- If issues persist, change the host architecture. \ No newline at end of file diff --git a/content/learning-paths/automotive/tinkerblox_ultraedge/yocto_installation.md b/content/learning-paths/automotive/tinkerblox_ultraedge/yocto_installation.md new file mode 100644 index 0000000000..8bee964870 --- /dev/null +++ b/content/learning-paths/automotive/tinkerblox_ultraedge/yocto_installation.md @@ -0,0 +1,71 @@ +--- +title: YOCTO Installation - UltraEdge + +weight: 3 + +layout: "learningpathall" +--- + +### System Requirements + +- Linux host (Aarch64 (arm64),armv7) + +{{% notice Note %}} +REMOVE ME: We need alot more detail here for YOCTO builds... what are the supported boards/etc... +{{% /notice %}} + +#### YOCTO Build Instructions + +{{% notice Note %}} +REMOVE ME: We need alot more detail here for YOCTO builds... where to pull images/BSPs/etc... how to configure the layers/packages/etc... +{{% /notice %}} + +1. Copy the `meta-tinkerblox` folder to your Yocto build environment. + +2. Add the layer: + + bitbake-layers add-layer + + +{{% notice Note %}} +REMOVE ME: We need alot more detail here for YOCTO builds... what does the build process look like. Where are the build completion artifacts. How to flash to the specific board/etc... +{{% /notice %}} + +3. Build and flash the firmware to the target hardware. + +#### Activation of Agent + +On the first boot, the agent will automatically generate a file named +`activation_key.json` at the path: + + /opt/tinkerblox/activation_key.json + +{{% notice Note %}} +REMOVE ME: need contact information to the Tinkerblox team below... +{{% /notice %}} + +Share this `activation_key.json` file with the TinkerBlox team to +receive license key (which includes license metadata). + +1. Stop the agent using the following command: + + sudo systemctl stop ultraedge.service + +2. Replace the existing `activation_key.json` file in + `/opt/tinkerblox/` with the licensed one provided by TinkerBlox. + +3. Start the agent: + + sudo systemctl start ultraedge.service + +#### Manual Running + +- Binary path: `/opt/tinkerblox/Ultraedge/EdgeBloXagent` + +- To start: + + cd /opt/tinkerblox/Ultraedge + ./EdgeBloXagent + +- To stop, press Ctrl + + C once. \ No newline at end of file diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/1_mixture_of_experts.md b/content/learning-paths/cross-platform/ernie_moe_v9/1_mixture_of_experts.md new file mode 100644 index 0000000000..db5d101356 --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/1_mixture_of_experts.md @@ -0,0 +1,60 @@ +--- +title: Why MoE Models Let Edge Devices Run 21B LLMs +weight: 2 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## What is Mixture of Experts (MoE)? + +As large language models grow to tens of billions of parameters, traditional dense networks — which activate all weights for every input — become infeasible for edge deployment, especially on CPU-only Arm devices. [Mixture of Experts (MoE)](https://en.wikipedia.org/wiki/Mixture_of_experts) offers a breakthrough. + +This is simple and uniform, but as model sizes increase—into the billions of parameters—this structure becomes both memory-intensive and compute-intensive. For edge environments like mobile devices, embedded systems, this makes deploying large models nearly impossible. + +***[Mixture of Experts (MoE)](https://en.wikipedia.org/wiki/Mixture_of_experts)*** offers an alternative. +Instead of using all parameters all the time, MoE introduces a conditional computation mechanism: each input token only activates a small subset of model components (called ***experts***). +Think of it like having a team of specialists, and only calling the relevant few for a given task. This makes MoE ideal for environments where compute or memory is constrained, such as edge AI or embedded inference. + + +In MoE: +- The model consists of many expert sub-networks (e.g., 64 experts). +- For each input, a router selects only 2–4 experts to compute the result. +- The rest of the experts remain inactive, conserving memory and compute. + +This dynamic routing is typically learned during training. In inference, only a fraction of the model is active, leading to much lower compute and memory usage ***without sacrificing the total model capacity** or ***diversity of learned behaviors***. + + +## Benefits of MoE Architecture + +- Scalable Model Size: Increase total parameter count without linearly increasing inference cost. +- Efficient Inference: Lower memory and FLOPs per input. +- Modularity: Each expert can learn domain-specific patterns (e.g., finance, medicine, language). +- Specialization: Encourages the model to learn distinct processing behaviors across different experts. +- Routing Flexibility: Makes it easier to adapt to specific tasks using fine-tuned expert selection. + +## ERNIE-4.5: A MoE Model for Chinese NLP + +The [ERNIE-4.5](https://huggingface.co/collections/baidu/ernie-45) model family from [Baidu](https://huggingface.co/baidu) introduces a Mixture-of-Experts (MoE) architecture, which enables massive models (e.g., 21 billion parameters) to be deployed in constrained environments. MoE models dynamically activate only a small subset of parameters (e.g., 2–4 experts) during inference. +Specifically, ERNIE-4.5 uses a softmax-based router to select the top-6 experts from a pool of 64 per layer, activating only a subset dynamically per token. This makes runtime both efficient and adaptive. This architecture allows the model to retain high performance and generalization while drastically reducing inference-time resource requirements. + +ERNIE-4.5 Model Series: +- PT (Post-Trained): General-purpose language model trained on Chinese and English data. +- Thinking: Optimized for reasoning tasks with long context support and structured outputs. + +In this learning path, we focus on the [ERNIE-4.5 Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking) variant as our primary model due to its enhancements for multi-step reasoning and long-context tasks. However, we also introduce the [PT (Post-Trained)](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-PT) variant to allow learners to compare model behavior across identical prompts, illustrating how task-specific tuning affects output quality. + +## Why MoE Matters for Edge Devices + +Deploying a 21B dense model on a CPU-only board is infeasible. But MoE changes that: + +| **Feature** | **Dense Model** | **MoE Model (e.g., ERNIE-4.5-21B)** | +|-----------------------|-----------------|---------------| +| `Total Parameters` | 21B | 21B | +| `Activated Parameters`| 21B | ~3B | +| `Memory Usage` | Very high | Moderate | +| `Inference Speed` | Slow | Fast | + +This efficiency enables powerful language models to be run locally on ARM-based platforms — making MoE not just a model design choice, but a deployment enabler. + +In the next module, you’ll bring this architecture to life — preparing a real Armv9 board, setting up llama.cpp, and verifying that a 21B MoE model like ERNIE-4.5 can run efficiently with no GPU required. diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/2_llamacpp_installation.md b/content/learning-paths/cross-platform/ernie_moe_v9/2_llamacpp_installation.md new file mode 100644 index 0000000000..ca0d619820 --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/2_llamacpp_installation.md @@ -0,0 +1,134 @@ +--- +title: Setting Up llama.cpp on Arm v9 develop board +weight: 3 +layout: "learningpathall" +--- + +## Setting Up llama.cpp on Arm v9 develop board + +In the previous section, you learned how Mixture-of-Experts (MoE) models reduce resource consumption by activating only a fraction of parameters. +Now, you'll walk through how to prepare your environment to deploy `ERNIE-4.5 MoE` models on an Armv9 platform using `llama.cpp`. + +In this module, you’ll verify model inference on Radxa O6 and validate multilingual outputs using ERNIE’s Thinking variant. + +This section prepares the foundation for deploying ERNIE-4.5 on an ARMv9 platform. You will begin by reviewing the hardware—specifically, the `Radxa O6` development board equipped with an Armv9 CPU. From there, you will install llama.cpp, a lightweight inference engine, build it from source, and download ERNIE-4.5 models in GGUF format (quantized to Q4 for efficient CPU inference). Finally, you will run a basic inference test to confirm that the environment is properly configured and ready for benchmarking and optimization in the next module. + +### Arm v9 development board + +In this learning path, we use the [Radxa O6](https://radxa.com/products/orion/o6/) — a compact Armv9 development board powered by the [CIX CD8180](https://en.cixtech.com/Personal-Computing/) SoC. It features: + +- 12-core Armv9.2 CPU +- Support for SVE, dotprod, and i8mm instruction sets +- Multiple HDMI, PCIe slot with Gen4x8, dual 5Gbps Ethernet Ports and USB-C for I/O expansion + +We chose this board because it balances affordability and performance. Most importantly, it supports vector instructions we’ll benchmark later in this path. + +The default system image for the board is [Debian](https://docs.radxa.com/en/orion/o6/debian/debian-user-guide), which includes a ready-to-use user environment. You can verify or reflash the OS by following the instructions on the Radxa O6 [download page](https://docs.radxa.com/en/orion/o6/download). + +With the Radxa O6 ready, let’s set up the software stack beginning with llama.cpp. + +### Step 1: Clone and Build llama.cpp + +First, ensure your system is up-to-date and install the required tools and libraries: + +```bash +sudo apt update +sudo apt install build-essential cmake python3 python3-pip htop +``` + +Next, build [llama.cpp](https://github.com/ggml-org/llama.cpp/), an open-source C++ framework for running and experimenting with large language models. Designed to be lightweight and fast, llama.cpp supports inference on edge devices (CPU-only) and implements many of the most popular LLM architectures. + +In the context of MoE models, `llama.cpp` currently supports: +- Openai-moe +- Oleo +- lm4-moe +- Qwen2 moe, Qwen3 moe +- Grok +- Ernie4.5 + +These models use diverse routing and expert management strategies, and llama.cpp provides a unified backend for efficient MoE inference. +For more in-depth coverage of llama.cpp capabilities and use cases, see those [learning paths](https://learn.arm.com/tag/llama.cpp/) for the detail. + +```bash +cd ~ +git clone https://github.com/ggerganov/llama.cpp.git +cd llama.cpp + +mkdir build && cd build +cmake .. +make -j$(nproc) +``` + +This will generate binaries like `llama-cli` under directory `~/llama.cpp/build/bin`, which we’ll use to run inference in later steps. +Once llama.cpp is compiled, we can now download the models we’ll use for evaluation. + + +### Step 2: Download ERNIE-4.5 Q4 GGUF Model + +In this learning path, you will use [ERNIE-4.5](https://huggingface.co/collections/baidu/ernie-45) to deploy in Arm v9. +Download both model variants so you can experiment later: + +```bash +mkdir -p ~/models/ernie-4.5 +cd ~/models/ernie-4.5 +wget https://modelscope.cn/models/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF/resolve/master/ERNIE-4.5-21B-A3B-PT-Q4_0.gguf +wget https://modelscope.cn/models/unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF/resolve/master/ERNIE-4.5-21B-A3B-Thinking-Q4_0.gguf +``` + +You can see the size of both models are 12 GB and quantized to Q4, making them suitable for CPU-only inference. + +{{% notice Note %}} +The Q4 quantized models reduce memory footprint and allow CPU‑only inference — you’ll still need around 12 GB of RAM for good performance. +{{% /notice %}} + +While both the Thinking and PT variants of ERNIE-4.5 share the same MoE architecture, they are fine-tuned for different objectives. The Thinking model is optimized for logical reasoning and structured generation, making it the main focus of subsequent benchmarking and hardware optimization. You are encouraged to install both variants and observe behavioral differences using the same prompt. + +### Step 3: Run a Basic Inference Test + +Navigate to the build directory and run the following command to verify that the model loads correctly and supports multilingual input: + +```bash +cd ~/llama.cpp/build +./bin/llama-cli \ + --jinja \ + -m ~/models/ernie-4.5/ERNIE-4.5-21B-A3B-Thinking-Q4_0.gguf \ + -p "Please introduce Mixture of Experts in Chinese." \ + -c 4096 -t 12 \ + --jinja +``` + +Note the flags: +- ***-p***: Passes the input prompt directly as a string. +- ***-c 4096***: Sets the context length (in tokens). A longer context allows the model to “remember” more input text, which is crucial for long-form tasks. Here we use the recommended 4096 tokens. +- ***-t 12***: Specifies the number of CPU threads used for inference. You should match this number to the physical cores (or logical threads) available on your system to maximize performance. +- ***--jinja***: Enables Jinja‑style prompt templates. Many Chinese‑oriented MoE models rely on this template format for structured inputs. + +If everything is set up correctly, you will see metadata output from llama.cpp indicating the model’s architecture and size: + +``` +print_info: model type = 21B.A3B +print_info: model params = 21.83 B +print_info: general.name = Ernie-4.5-21B-A3B-Thinking +``` + +Once inference is complete, the expected output will look like this (in Chinese): + +![img1 alt-text#center](meo_result.png "Introduce Mixture of Experts in Chinese") + +This answer demonstrates the model’s multilingual and structured reasoning ability. It begins with a narrative explanation introducing the concept of Mixture of Experts (MoE), followed by a well-organized markdown-style summary with section headers and bullet points: +- Concept breakdown: It describes MoE as a model combining multiple specialized sub-models (experts) and a routing mechanism (gateway) to activate only a few experts per input. +- Workflow explanation: It explains how routing selects top experts based on input features and how the system reduces computation by only activating a small number of experts per token. +- Chinese NLP examples: It gives examples such as word segmentation and translation, explaining how different experts may focus on distinct subtasks. + +This confirms: +- The GGUF model is successfully loaded. +- The llama.cpp build functions as expected. +- CPU-only inference on Armv9 is working. + +#### Why This Prompt Matters + +This prompt, “Please introduce Mixture of Experts in Chinese.”, was chosen for its dual pedagogical value: +- ***Bilingual Capability Check***: The instruction is issued in English, but the answer is expected in Chinese. This helps confirm that ERNIE-4.5’s multilingual support is active and effective. +- ***MoE Behavior Engagement***: The topic itself — explaining “Mixture of Experts” — requires combining multiple sub-skills: technical understanding, translation, and structured explanation. This likely triggers different experts within the model to contribute during inference. Even though routing isn’t explicitly logged, the richness and precision of the output suggest that MoE routing is functioning as designed. This kind of prompt increases the likelihood of multiple experts being activated simultaneously—e.g., language generation, machine learning knowledge, and Chinese translation. + +By using a single prompt, you verify setup correctness, observe output quality, and gain insight into MoE inference characteristics — all essential elements before moving on to hardware-specific performance tuning. diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/3_erine_moe.md b/content/learning-paths/cross-platform/ernie_moe_v9/3_erine_moe.md new file mode 100644 index 0000000000..1dc710952b --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/3_erine_moe.md @@ -0,0 +1,179 @@ +--- +title: Comparing ERNIE PT vs Thinking and Expert Routing +weight: 4 +layout: "learningpathall" +--- + +## Compare ERNIE PT vs Thinking Model Behavior + +Now that both ERNIE-4.5 models are installed and verified, you can compare their output behavior on the same task. + +In this module, you’ll compare the inference styles of PT and Thinking models, and learn how to inspect internal MoE expert routing behavior during generation. + +With both ERNIE-4.5 models installed and ready, we can now observe how their tuning objectives affect output—even though both share the same MoE architecture and parameter count (~21B total, ~3B activated at runtime): +- PT (Post-Trained): General-purpose, trained on multilingual corpora. +- Thinking: Tuned for multi-step reasoning, long context, and structured response generation. + +### Example Task: Product Behavior Analysis +Copy the following prompt into a file named `prompt1.txt`: + +``` +You are a fitness brand strategist. +User profile: Buys protein powder + dumbbells + gym wear, works out at home 4‑5× per week, shares results online, now exploring recovery nutrition and smart gym gear. +Task: +1. Identify their top motivation and one hidden pain point. +2. Propose one new product line. +3. Create a short marketing tagline (≤ 15 words). +``` + +Run the prompt using both models: + +***PT Variant:*** +```bash +./bin/llama-cli \ + --jinja \ + -m ~/models/ernie-4.5/ERNIE-4.5-21B-A3B-PT-Q4_0.gguf \ + -f prompt1.txt \ + -c 4096 -t 12 \ + --jinja +``` + +The answer will looks like: + +``` +Assistant: 1. **Top Motivation**: Achieving visible results and maintaining progress through efficient recovery nutrition. +**Hidden Pain Point**: Balancing high-intensity training with optimal recovery nutrition during busy workouts. +2. **New Product Line**: *Smart Recovery Meal Kits* – Customizable, nutrient-dense, and easy-to-prepare post-workout meals designed for quick consumption and recovery optimization. +3. **Marketing Tagline**: "Fuel Progress, Recover Faster – Smart Nutrition for the Hard Worker." +``` + +The answer shows: +- Delivers conclusions directly: motivations and pain points are briefly mentioned with little reasoning. +- Product ideas are sensible but templated: suggestions like “Smart Recovery Meal Kits” are plausible but lack contextual grounding in user behavior. +- Tagline reads like a standard advertisement: e.g., “Fuel Progress, Recover Faster” feels promotional rather than personalized. + + +***Thinking Variant:*** +```bash +./bin/llama-cli \ + --jinja \ + -m ~/models/ernie-4.5/ERNIE-4.5-21B-A3B-Thinking-Q4_0.gguf \ + -f prompt1.txt \ + -c 4096 -t 12 \ + --jinja +``` + +You will see the more comprehensive answer like: + +``` + assistant + +Let's tackle this user profile. The user buys protein powder, dumbbells, and gym wear. They work out 4-5 times a week at home and share results online. Now they're exploring recovery nutrition and smart gym gear. + +First, their top motivation. They're into sharing results online, so maybe they want to inspire others or gain recognition. That's a big motivation. But what's a hidden pain point? They're moving into recovery nutrition and smart gear. Maybe they find it hard to track recovery progress or ensure their nutrition is optimized for recovery. Smart gear could help, but maybe they don't have the right tools to monitor recovery metrics like sleep, nutrition, or muscle soreness. That's a hidden pain point since they're exploring it. + +Next, propose a new product line. Since they're into smart gym gear, maybe a smart recovery watch that tracks sleep, activity, and recovery metrics. Or a nutrition tracking device that syncs with their protein powder and dumbbells. Wait, they might need something that integrates with their existing products. Maybe a smart scale that tracks muscle and body composition, connected to their fitness apps. Or a wearable device that monitors recovery parameters like heart rate variability and sleep. + +Marketing tagline needs to be concise, under 15 words. Combine motivation and product. For example, "Track Your Recovery, Maximize Your Gains—Smart Gear for Every Workout." But maybe more catchy. "Smarter Recovery, Stronger Results. Your Workout, Tracked." Hmm, not sure. Let me think. Maybe "Track Recovery, Fuel Progress. Smart Gear for Every Muscle." Wait, the user shares results online, so maybe highlight community or shared progress. "Share. Track. Conquer. Smart Gear for Your Recovery Journey." That's 10 words. Or "Your Workout, Tracked. Recovery, Simplified. Smart Gear Now." Maybe too vague. Let me refine. "Track. Recover. Share. Smart Gear for Your Journey." 8 words. Maybe that's better. Or "Recovery Tracked. Gains Shared. Smart Gear for Every Muscle." 9 words. Not sure. Alternatively, focus on the product. "Smart Recovery, Shared Success. Your Gear, Now Smarter." 7 words. Maybe that's better. + +Wait, the user is moving into recovery nutrition and smart gear. So the product line could be a smart recovery tracker. The tagline should tie their motivation (sharing results) with the new product. Maybe "Track. Recover. Share. Smart Gear for Your Journey." 8 words. Or "Your Workouts, Tracked. Recovery, Simplified. Smart Gear Now." 9 words. Maybe that works. + +So to sum up: + +1. Top motivation: Inspiring others through shared results and progress tracking. Hidden pain point: Difficulty tracking recovery metrics and nutrition effectively. + +2. Product line: Smart Recovery Tracker (wearable or app integrating with gear) that monitors sleep, nutrition, and recovery metrics, syncing with their existing products. + +3. Tagline: "Track. Recover. Share. Smart Gear for Your Journey." (8 words) + + + +1. **Top Motivation**: Inspiring others through shared results and progress tracking. +2. **Hidden Pain Point**: Struggling to track recovery metrics (sleep, nutrition, muscle soreness) effectively. +3. **Product Line**: **Smart Recovery Tracker** (wearable monitoring sleep/nutrition, syncing with protein powder/dumbbells to optimize recovery). +4. **Tagline**: *"Track. Recover. Share. Smart Gear for Your Journey."* (8 words) + + +``` + +The answer shows: +- Responses show deeper layering: begins with inferring user motivation, iterates through possible product strategies, and only then crafts a refined tagline. +- Transparent reasoning process: the model “thinks out loud” (“let me try again… maybe that’s too long…”), mimicking human deliberation. +- Marketing language reflects user mindset: taglines like “Share. Track. Conquer.” directly appeal to community sharing and progress motivation. + + +### Compare the Outputs +After execution, review the responses and compare them along the following dimensions: + +| **Aspect** | **PT Model** | **Thinking Model** | +|----------------------|---------------------------------------------------|--------------------------------------------------------------------------------------| +| `Language Style` | Direct, ad-like tone | Layered and natural, mimics human internal reasoning | +| `Reasoning Depth` | High-level summarization with minimal exploration | Step-by-step inference with intermediate reflections and multiple alternatives | +| `Suggested Actions` | Pre-packaged idea (e.g., meal kits) | Context-aware concepts (e.g., smart gear that integrates with current user behavior) | + + + +## Observe MoE Expert Routing with Debug Logs + +If you want to look under the hood of the MoE model, you’ll now add debug log to observe internal MoE behavior which experts are routed during inference. + +Open `src/models/ernie4‑5‑moe.cpp` in the llama.cpp repository and locate the function build_moe_ffn(). +Insert a print statement right after the top‑k expert selection. For example: + +```c +printf("---[DEBUG]--- entering build_moe_ffn at layer %d with %d experts (use %d)\n", il, n_expert, n_expert_used); +``` + +Rebuild llama.cpp: + +```bash +cd ~/llama.cpp +make -j$(nproc) +``` + +Run inference with the same prompt and monitor the console for lines such as: +``` +---[DEBUG]--- entering build_moe_ffn at layer 1 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 2 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 3 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 4 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 5 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 6 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 7 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 8 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 9 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 10 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 11 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 12 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 13 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 14 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 15 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 16 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 17 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 18 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 19 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 20 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 21 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 22 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 23 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 24 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 25 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 26 with 64 experts (use 64) +---[DEBUG]--- entering build_moe_ffn at layer 27 with 64 experts (use 64) +``` + +This reveals how many experts (e.g., 6) and how many tokens (e.g., 16) were routed at that layer. + +{{% notice Note %}} +You can also trace the function `llm_graph_context::build_moe_ffn()` inside the `src/llama-graph.cpp` about how to select the expert. +{{% /notice %}} + +What to observe: +- Whether the number of active experts changes between the PT and Thinking models. +- Patterns in routing: e.g., different token batches routing to differing expert sets. +- Correlate routing behaviour with output differences: deeper routing variety may align with more detailed responses. + +## Takeaway +This task highlights the advantage of MoE fine-tuning: even under the same architecture, thoughtful tuning can significantly change a model’s reasoning behavior. It also reinforces that Thinking is better suited for applications requiring analytical depth—ideal for edge AI scenarios like customer profiling or real-time recommendations. + +In the next section, you’ll switch focus from model behavior to system-level performance — compiling with Armv9 instruction sets and measuring the impact on inference speed. diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/4_v9_optimization.md b/content/learning-paths/cross-platform/ernie_moe_v9/4_v9_optimization.md new file mode 100644 index 0000000000..41dc5e9be1 --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/4_v9_optimization.md @@ -0,0 +1,132 @@ +--- +title: Arm v9 Optimization and MoE Efficiency +weight: 5 +layout: "learningpathall" +--- + +## Accelerate ERNIE-4.5 with Armv9 Optimizations + +In previous modules, you've learned how MoE enables large model deployment on CPUs, and how to observe inference behavior with ERNIE-4.5. Now, we'll optimize performance using Armv9 architecture features and benchmark the improvements. + +This section shows how to benchmark performance under two scenarios: with and without Armv9 vector instruction optimizations. + +We’ll compare: +- Baseline: regular CPU build +- Optimized: Armv9-specific build with SVE/i8mm/dotprod enabled + +To establish a baseline performance, let’s first compile llama.cpp without Armv9 optimizations. + +### Disable llama.cpp v9 Optimizations + +This step builds `llama.cpp` without Armv9 vector features to establish a baseline. + +```bash +cd ~/llama.cpp +mkdir build_v9_off && cd build_v9_off +cmake \ + -DLLAMA_CURL=OFF \ + -DGGML_LLAMAFILE=OFF \ + -DGGML_VULKAN=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_SYSTEM_PROCESSOR=arm64 \ + -DCMAKE_OSX_ARCHITECTURES=arm64 \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=off \ + -DGGML_AVX2=off \ + -DGGML_AVX512=off \ + -DGGML_FMA=off \ + -DGGML_F16C=off \ + -DGGML_CPU_KLEIDIAI=OFF \ + .. + +make -j$(nproc) +``` + +Then run benchmark in `build_v9_off` directory. + +```bash +./bin/llama-bench -m ~/models/ernie-4.5/ERNIE-4.5-21B-A3B-Thinking-Q4_0.gguf -pg 128,128 -t 8 +``` + +The result for 24GB Radxa O6 will be: + +| model | size | params | backend | threads | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | --------------: | -------------------: | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | pp512 | 14.96 ± 0.01 | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | tg128 | 12.03 ± 0.02 | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | pp128+tg128 | 13.51 ± 0.03 | + +With the baseline captured, we now recompile with Armv9 vector extensions enabled. + +### Enable llama.cpp v9 Optimizations + +Now rebuild with vector extensions enabled (i8mm, dotprod, SVE) by following configuration setting. + +```bash +cd ~/llama.cpp +mkdir build_v9_on && cd build_v9_on +cmake \ + -DLLAMA_CURL=OFF \ + -DGGML_LLAMAFILE=OFF \ + -DGGML_VULKAN=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_SYSTEM_PROCESSOR=armv9-a \ + -DCMAKE_OSX_ARCHITECTURES=arm64 \ + -DGGML_NATIVE=OFF \ + -DGGML_AVX=off \ + -DGGML_AVX2=off \ + -DGGML_AVX512=off \ + -DGGML_FMA=off \ + -DGGML_F16C=off \ + -DGGML_CPU_ARM_ARCH=armv9-a+i8mm+dotprod+sve \ + -DGGML_CPU_KLEIDIAI=ON \ + .. + +make -j$(nproc) +``` + +{{% notice Note %}} +We disable GPU and other backend support to focus exclusively on CPU performance and optimization for this learning path. +{{% /notice %}} + +Then re-run benchmark in `build_v9_on` directory. + +```bash +./bin/llama-bench -m ~/models/ernie-4.5/ERNIE-4.5-21B-A3B-Thinking-Q4_0.gguf -pg 128,128 -t 8 +``` + +The result for 24GB Radxa O6 will be: + +| model | size | params | backend | threads | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | ------: | --------------: | -------------------: | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | pp512 | 38.51 ± 0.11 | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | tg128 | 15.96 ± 0.08 | +| ernie4_5-moe 21B.A3B Q4_0 | 11.64 GiB | 21.83 B | CPU | 8 | pp128+tg128 | 21.58 ± 0.11 | + + +Let’s now compare the results side-by-side to see how much performance is gained. + +### Comparing Performance: Armv9 Optimization Results + +After running benchmarks with and without Armv9-specific instructions, the results show significant gains. + +| Test | v9 off | v9 on | Gain | +|---------------|-----------------|----------------|---------| +| pp512 | 14.96 token/s | 38.51 token/s | 2.57x | +| tg128 | 12.03 token/s | 15.96 token/s | 1.32x | +| pp128 + tg128 | 13.51 token/s | 21.58 token/s | 1.59x | + +- Vectorized kernels (i8mm, dotprod, SVE) drastically improve inference throughput. +- The pp512 test shows the most significant acceleration, delivering a 2.57× improvement. +- Other patterns like tg128 and pp128+tg128 also achieve measurable gains, demonstrating the broad benefit of hardware-aware builds. +- Armv9 optimization enables practical real-time inference for 21B models on edge-class hardware. + + +### Summary +Over this learning path, you've walked through every stage of deploying a 21B parameter Chinese MoE model on edge-class Armv9 hardware. You: +- Understood how MoE reduces memory usage by only activating a small subset of parameters per token. +- Set up llama.cpp and deployed ERNIE-4.5 on a Radxa O6 board. +- Compared ERNIE-4.5 Thinking and PT model behaviors and examined expert routing logic with debug instrumentation. +- Applied Armv9 hardware optimizations to unlock over 2.5× speed improvements in token throughput. + +You now have the full-stack capabilities to deploy, profile, and tune Chinese LLMs for efficient inference on modern Arm CPUs. diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/_index.md b/content/learning-paths/cross-platform/ernie_moe_v9/_index.md new file mode 100644 index 0000000000..3dac628c68 --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/_index.md @@ -0,0 +1,63 @@ +--- +title: Running ERNIE Mixture of Experts (MoE) Models on Armv9 with llama.cpp + +draft: true +cascade: + draft: true + +minutes_to_complete: 60 + +who_is_this_for: This learning path is designed for developers and engineers looking to deploy Mixture-of-Experts (MoE) models — such as ERNIE-4.5 — on edge-class devices. MoE architectures allow massive LLMs (21B+ parameters) to run with only a fraction of their weights active per inference, making them ideal for resource-constrained environments. + +learning_objectives: + - Understand how MoE models like ERNIE-4.5 enable large-scale inference on edge devices. + - Set up and execute ERNIE-4.5 (PT and Thinking versions) using llama.cpp and compare the inference behavior. + - Analyze the performance impact of enabling Armv9-specific hardware optimizations. + +prerequisites: + - One Arm V9 device at least 32GB of available disk space. In this learning path, I use [Radxa O6](https://radxa.com/products/orion/o6/) + +author: Odin Shen + +### Tags +skilllevels: Introductory +subjects: ML +armips: + - Cortex-A +operatingsystems: + - Linux +tools_software_languages: + - Python + - C++ + - Bash + - llama.cpp + +### Cross-platform metadata only +shared_path: true +shared_between: + - laptops-and-desktops + - servers-and-cloud-computing + - iot + - mobile-graphics-and-gaming + +further_reading: + - resource: + title: ERNIE-4.5-21B Modelscope link + link: https://modelscope.cn/models/unsloth/ERNIE-4.5-21B-A3B-PT-GGUF + type: website + - resource: + title: llama.cpp github repo + link: https://github.com/ggml-org/llama.cpp.git + type: documentation + - resource: + title: Arm Learning Path + link: https://learn.arm.com/learning-paths/servers-and-cloud-computing/llama_cpp_streamline/ + type: Learning Path + + +### FIXED, DO NOT MODIFY +# ================================================================================ +weight: 1 # _index.md always has weight of 1 to order correctly +layout: "learningpathall" # All files under learning paths have this same wrapper +learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. +--- diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/_next-steps.md b/content/learning-paths/cross-platform/ernie_moe_v9/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/cross-platform/ernie_moe_v9/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/cross-platform/ernie_moe_v9/meo_result.png b/content/learning-paths/cross-platform/ernie_moe_v9/meo_result.png new file mode 100644 index 0000000000..a5a34df995 Binary files /dev/null and b/content/learning-paths/cross-platform/ernie_moe_v9/meo_result.png differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.png b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.png deleted file mode 100644 index 17da0dab81..0000000000 Binary files a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.png and /dev/null differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.webp b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.webp new file mode 100644 index 0000000000..60103e620b Binary files /dev/null and b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/ci-cd-new.webp differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.png b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.png deleted file mode 100644 index 3cef6e3a2d..0000000000 Binary files a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.png and /dev/null differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.webp b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.webp new file mode 100644 index 0000000000..c8ee6c7c3b Binary files /dev/null and b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/editor-yml.webp differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.png b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.png deleted file mode 100644 index 5bba639d81..0000000000 Binary files a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.png and /dev/null differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.webp b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.webp new file mode 100644 index 0000000000..aa51494dba Binary files /dev/null and b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/new-pipeline.webp differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.png b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.png deleted file mode 100644 index 5e516c0fc4..0000000000 Binary files a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.png and /dev/null differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.webp b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.webp new file mode 100644 index 0000000000..e6c27bbfe6 Binary files /dev/null and b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/pipeline-execution.webp differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.png b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.png deleted file mode 100644 index 0a6728eb18..0000000000 Binary files a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.png and /dev/null differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.webp b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.webp new file mode 100644 index 0000000000..3fe2d5eca1 Binary files /dev/null and b/content/learning-paths/cross-platform/gitlab-managed-runners/_images/visual-pipeline.webp differ diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md b/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md index 2a218102a7..e8cb26a591 100644 --- a/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md +++ b/content/learning-paths/cross-platform/gitlab-managed-runners/pipeline.md @@ -17,7 +17,7 @@ Once we run our pipeline with the correct **`tags`** Gitlab will create everythi 1. Start by going to the main project page where we will need to Create the CI/CD pipeline. 2. We can choose to create **`.gitlab-ci.yml`** file by using one of the 2 options circled in red in the image below. -![CI-CD-New #center](_images/ci-cd-new.png) +![CI-CD-New #center](_images/ci-cd-new.webp) Option1: We can Click on **`Set up CI/CD`** button/link and follow the wizad to create an empty **`.gitlab-ci.yml`** file. @@ -25,7 +25,7 @@ Option2: Click on the "+" button. From the popup menu click on **`New File`** op ![New-YML #center](_images/new-yml.png) 3. A page like the one in the image below will be visible with our **`.gitlab-ci.yml`** file. From here, we will need to Click on the **`Edit`** button. A menu will pop up, We will click on **`Edit in pipeline Editor`** which will allow us to add our CD/CD script. -![Editor-YML #center](_images/editor-yml.png) +![Editor-YML #center](_images/editor-yml.webp) 4. In the pipeline editor, just copy and paste the following YML script and click on commit changes (Add any relevent message as your commit update message). ```YML diff --git a/content/learning-paths/cross-platform/gitlab-managed-runners/results.md b/content/learning-paths/cross-platform/gitlab-managed-runners/results.md index 3507c8bad9..c4680bf19e 100644 --- a/content/learning-paths/cross-platform/gitlab-managed-runners/results.md +++ b/content/learning-paths/cross-platform/gitlab-managed-runners/results.md @@ -41,14 +41,14 @@ Gitlab offers a lot of documentions on how to create pipeline that fits differen ## How to run your pipeline for testing and to check the results: From the left hand side panel, Navigate to **`Build`** then to **`Pipeline`** then click on **`New pipeline`** button on the top right hand side just like the image below. In the new window click on **`New pipeline`** button again and your pipeline will start to execute. -![New-Pipeline #center](_images/new-pipeline.png) +![New-Pipeline #center](_images/new-pipeline.webp) To check the status of your pipeline and to check the output of any of it's Jobs simply click on any of the **`Jobs`** as the image below (with red rectangle around them). -![pipeline-execution #center](_images/pipeline-execution.png) +![pipeline-execution #center](_images/pipeline-execution.webp) ## Gitlab Helpful tools -If you navigate to your pipeline editor from before you will notice that there are more tabs in that page other than the **`Edit`** tab. ![visual-pipeline #center](_images/visual-pipeline.png) +If you navigate to your pipeline editor from before you will notice that there are more tabs in that page other than the **`Edit`** tab. ![visual-pipeline #center](_images/visual-pipeline.webp) ### The other Tabs are: diff --git a/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md b/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md index 0b0cda642d..8c5b2e3bfe 100644 --- a/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md +++ b/content/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started.md @@ -318,6 +318,7 @@ These Apple devices support SME2 natively. | Device | Release Date | Chip Options | |-------------------------------------|--------------|---------------------------| | iPhone 16 | 2024 | A18 | +| iPhone 17 | 2025 | A19 | | iPad Pro (7th generation) | 2024 | M4 | | iMac (2024) | 2024 | M4 | | Mac Mini (2024) | 2024 | M4, M4 Pro, M4 Max | diff --git a/content/learning-paths/iot/_index.md b/content/learning-paths/iot/_index.md index 4f5be1e835..f3194ebb30 100644 --- a/content/learning-paths/iot/_index.md +++ b/content/learning-paths/iot/_index.md @@ -12,11 +12,11 @@ subjects_filter: - CI-CD: 4 - Containers and Virtualization: 2 - Embedded Linux: 2 -- ML: 2 +- ML: 3 - Performance and Architecture: 3 operatingsystems_filter: - Baremetal: 4 -- Linux: 9 +- Linux: 10 - macOS: 2 - RTOS: 2 - Windows: 2 @@ -28,13 +28,16 @@ tools_software_languages_filter: - Azure: 1 - balenaCloud: 1 - BalenaOS: 1 +- Bash: 1 - C: 1 +- C++: 1 - Docker: 2 - FVP: 1 - GitHub: 3 +- llama.cpp: 1 - Matter: 1 - MCP: 1 -- Python: 2 +- Python: 3 - Raspberry Pi: 4 - Remote.It: 1 - ROS 2: 1 diff --git a/content/learning-paths/laptops-and-desktops/_index.md b/content/learning-paths/laptops-and-desktops/_index.md index ada046b622..b116457f4c 100644 --- a/content/learning-paths/laptops-and-desktops/_index.md +++ b/content/learning-paths/laptops-and-desktops/_index.md @@ -9,14 +9,14 @@ maintopic: true operatingsystems_filter: - Android: 2 - ChromeOS: 2 -- Linux: 37 +- Linux: 38 - macOS: 10 - Windows: 46 subjects_filter: - CI-CD: 6 - Containers and Virtualization: 7 - Migration to Arm: 30 -- ML: 4 +- ML: 5 - Performance and Architecture: 28 subtitle: Create and migrate apps for power efficient performance title: Laptops and Desktops @@ -28,10 +28,10 @@ tools_software_languages_filter: - Arm Performance Libraries: 2 - Arm64EC: 1 - Assembly: 1 -- Bash: 2 +- Bash: 3 - C: 10 - C#: 6 -- C++: 11 +- C++: 12 - CCA: 1 - Clang: 13 - CMake: 3 @@ -54,7 +54,7 @@ tools_software_languages_filter: - Kubernetes: 1 - KVM: 1 - Linux: 1 -- llama.cpp: 2 +- llama.cpp: 3 - LLM: 1 - LLVM: 2 - llvm-mca: 1 @@ -67,7 +67,7 @@ tools_software_languages_filter: - OpenCV: 1 - perf: 4 - PowerShell: 1 -- Python: 8 +- Python: 9 - QEMU: 1 - Qt: 2 - RDP: 1 diff --git a/content/learning-paths/mobile-graphics-and-gaming/_index.md b/content/learning-paths/mobile-graphics-and-gaming/_index.md index 85a5b968cc..0eaefddaa4 100644 --- a/content/learning-paths/mobile-graphics-and-gaming/_index.md +++ b/content/learning-paths/mobile-graphics-and-gaming/_index.md @@ -9,14 +9,14 @@ key_ip: - Mali maintopic: true operatingsystems_filter: -- Android: 32 -- Linux: 32 +- Android: 33 +- Linux: 33 - macOS: 14 - Windows: 14 subjects_filter: - Gaming: 6 - Graphics: 6 -- ML: 14 +- ML: 16 - Performance and Architecture: 35 subtitle: Optimize Android apps and build faster games using cutting-edge Arm tech title: Mobile, Graphics, and Gaming @@ -31,10 +31,11 @@ tools_software_languages_filter: - Arm Mobile Studio: 1 - Arm Performance Studio: 3 - Assembly: 1 +- Bash: 1 - Bazel: 1 -- C: 4 +- C: 5 - C#: 3 -- C++: 13 +- C++: 14 - CCA: 1 - Clang: 12 - CMake: 2 @@ -53,6 +54,7 @@ tools_software_languages_filter: - KleidiAI: 2 - Kotlin: 8 - LiteRT: 1 +- llama.cpp: 1 - LLM: 1 - LLVM: 1 - llvm-mca: 1 @@ -61,7 +63,7 @@ tools_software_languages_filter: - NEON: 1 - ONNX Runtime: 1 - OpenGL ES: 1 -- Python: 5 +- Python: 7 - PyTorch: 2 - QEMU: 1 - RenderDoc: 1 diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/1-litert-kleidiai-sme2.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/1-litert-kleidiai-sme2.md new file mode 100644 index 0000000000..c7d60425c5 --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/1-litert-kleidiai-sme2.md @@ -0,0 +1,52 @@ +--- +title: Explore LiteRT, XNNPACK, KleidiAI, and SME2 +weight: 2 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Inside the LiteRT software stack + +LiteRT (Lightweight Runtime, formerly TensorFlow Lite) is a runtime for on-device AI on Arm platforms. The default CPU acceleration library used by LiteRT is XNNPACK. + +XNNPACK is an open-source library that provides highly optimized implementations of neural-network operators. It continuously integrates the KleidiAI library to use new CPU features such as Scalable Matrix Extension 2 (SME2). + +KleidiAI is a library developed by Arm that offers performance-critical micro-kernels using Arm architecture features, such as SME2. + +The software stack for LiteRT is shown below. + +![Diagram showing the software stack for on-device AI on Arm platforms. The stack is organized in layers from top to bottom: LiteRT at the top, followed by XNNPACK, then KleidiAI, and SME2 at the bottom. Arrows indicate the flow of execution from LiteRT through XNNPACK to KleidiAI and SME2. The diagram includes the following text labels: LiteRT, XNNPACK, KleidiAI, SME2. The environment is technical and structured, emphasizing the integration of Arm-optimized libraries for efficient AI inference. alt-text#center](./litert-sw-stack.png "LiteRT, XNNPACK, KleidiAI, and SME2 software stack") + +## How KleidiAI works in LiteRT + +To understand how KleidiAI SME2 micro-kernels work in LiteRT, think about a LiteRT model with one fully connected operator using the FP32 data type. The following diagrams illustrate the execution workflow of XNNPACK's implementation compared with the workflow when KleidiAI SME2 is enabled in XNNPACK. + +### LiteRT → XNNPACK workflow + +![Diagram showing the workflow for a fully connected operator in LiteRT using XNNPACK. The diagram depicts the flow from LiteRT to XNNPACK, highlighting the use of NEON instructions for matrix multiplication and weight packing on Arm platforms. The technical environment emphasizes operator traversal, hardware detection, and parallel computation. alt-text #center](./litert-xnnpack-workflow.png "LiteRT, XNNPACK workflow") +A fully connected operator multiplies two matrices: the input activations (LHS) and the weights (RHS). + +When LiteRT loads a model, it reads the operators and builds a computation graph. If you select the CPU as the accelerator, LiteRT uses XNNPACK by default. + +XNNPACK scans the computation graph and looks for operators it can optimize. It packs the weight matrix to prepare for efficient computation. On Arm platforms, XNNPACK uses NEON instructions to speed up this packing and the matrix multiplication. + +At runtime, XNNPACK checks the hardware and chooses the best available micro-kernel. During inference, it splits the matrices into smaller tiles and runs the multiplications in parallel across multiple threads, using NEON instructions for faster processing. + +### LiteRT → XNNPACK → KleidiAI workflow + +![Diagram showing the workflow for a fully connected operator in LiteRT using XNNPACK and KleidiAI with SME2. The diagram illustrates the flow from LiteRT to XNNPACK, then to KleidiAI, highlighting SME2 micro-kernel integration for matrix multiplication and packing. The technical context emphasizes runtime hardware detection and optimized operator execution. alt-text #center](./litert-xnnpack-kleidiai-workflow.png "LiteRT, XNNPACK, KleidiAI workflow") + +When KleidiAI and SME2 are enabled at build time, the KleidiAI SME2 micro-kernels are compiled into XNNPACK. + +During the model loading stage, when XNNPACK optimizes the subgraph, it checks the operator’s data type to determine whether a KleidiAI implementation is available. If KleidiAI supports it, XNNPACK bypasses its own default implementation. As a result, RHS packing is performed using the KleidiAI SME packing micro-kernel. Because KleidiAI typically requires packing of the LHS, a flag is also set during this stage. + +During model inference, the LHS packing micro-kernel is invoked. After the LHS is packed, XNNPACK performs the matrix multiplication. At this point, the KleidiAI SME micro-kernel is used to compute the matrix product. + +## What you've accomplished and what's next + +In this section, you explored how LiteRT leverages XNNPACK and KleidiAI to accelerate fully connected operators on Arm platforms. You learned how XNNPACK uses NEON instructions for efficient matrix operations and how enabling KleidiAI with SME2 further optimizes performance by introducing specialized micro-kernels for packing and matrix multiplication. + +You have completed the overview of LiteRT, XNNPACK, KleidiAI, and SME2 integration. Next, you’ll dive deeper into building and benchmarking models with these technologies. + + diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/2-build-tool.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/2-build-tool.md new file mode 100644 index 0000000000..b1fd8322ef --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/2-build-tool.md @@ -0,0 +1,141 @@ +--- +title: Build the LiteRT benchmark tool +weight: 4 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Build the LiteRT benchmark tool with KleidiAI and SME2 enabled + +LiteRT provides a standalone performance measurement utility called `benchmark_model` for evaluating the performance of LiteRT models. + +In this section, you will build two versions of the benchmark tool: +- With KleidiAI and Scalable Matrix Extension version 2 (SME2) enabled, which uses Arm-optimized micro-kernels +- Without KleidiAI and SME2, which provides baseline performance using NEON or SVE2 fallback + +This comparison demonstrates the performance gains provided by SME2 acceleration. + +First, clone the LiteRT repository: + +```bash +cd $WORKSPACE +git clone https://github.com/google-ai-edge/LiteRT.git +``` + +Because LiteRT integrates KleidiAI through XNNPACK (an open-source library providing highly optimized neural-network operators), you must build LiteRT from source to enable SME2 micro-kernels. + +Next, set up your Android build environment using Docker on your Linux development machine. Google provides a Dockerfile that installs the toolchain needed for TensorFlow Lite (TFLite)/LiteRT Android builds. + +Download the Dockerfile: + +```bash +wget https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/lite/tools/tflite-android.Dockerfile +``` + +Build the Docker image: + +```bash +docker build . -t tflite-builder -f tflite-android.Dockerfile +``` + +The Docker image includes Bazel, Android Native Development Kit (NDK), CMake, toolchains, and Python required for cross-compiling Android binaries. + +Now, install Android Software Development Kit (SDK) and NDK components inside the container. + +Launch the Docker container: + +```bash +docker run -it -v $PWD:/host_dir tflite-builder bash +``` + +Install Android platform tools: + +```bash +sdkmanager \ + "build-tools;${ANDROID_BUILD_TOOLS_VERSION}" \ + "platform-tools" \ + "platforms;android-${ANDROID_API_LEVEL}" +``` + +Configure LiteRT build options inside your running container: + +```bash +cd /host_dir/LiteRT +./configure +``` + +Use default values for all prompts except when asked: + +```output +Would you like to interactively configure ./WORKSPACE for Android builds? [y/N] +``` + +Type `y` and press Enter. + +LiteRT's configuration script will detect SDK and NDK paths, set toolchain versions, configure the Android Application Binary Interface (ABI) to arm64-v8a, and initialize Bazel workspace rules. + +Now, you can build the benchmark tool with KleidiAI and SME2 enabled. + +Enable XNNPACK, quantization paths, and SME2 acceleration: + +```bash +export BENCHMARK_TOOL_PATH="litert/tools:benchmark_model" +export XNNPACK_OPTIONS="--define tflite_with_xnnpack=true \ +--define=tflite_with_xnnpack_qs8=true \ +--define=tflite_with_xnnpack_qu8=true \ +--define=tflite_with_xnnpack_dynamic_fully_connected=true \ +--define=xnn_enable_arm_sme=true \ +--define=xnn_enable_arm_sme2=true \ +--define=xnn_enable_kleidiai=true" +``` + +Build for Android: + +```bash +bazel build -c opt --config=android_arm64 \ +${XNNPACK_OPTIONS} "${BENCHMARK_TOOL_PATH}" \ +--repo_env=HERMETIC_PYTHON_VERSION=3.12 +``` + +This build enables the KleidiAI and SME2 micro-kernels integrated into XNNPACK and produces an Android binary at: + +```output +bazel-bin/litert/tools/benchmark_model +``` + +## Build the LiteRT benchmark tool without KleidiAI (baseline comparison) + +To compare the performance of the KleidiAI SME2 implementation against XNNPACK's original implementation, build another version of the LiteRT benchmark tool without KleidiAI and SME2 enabled. + +Set the build options to disable SME2 and KleidiAI: + +```bash +export BENCHMARK_TOOL_PATH="litert/tools:benchmark_model" +export XNNPACK_OPTIONS="--define tflite_with_xnnpack=true \ +--define=tflite_with_xnnpack_qs8=true \ +--define=tflite_with_xnnpack_qu8=true \ +--define=tflite_with_xnnpack_dynamic_fully_connected=true \ +--define=xnn_enable_arm_sme=false \ +--define=xnn_enable_arm_sme2=false \ +--define=xnn_enable_kleidiai=false" +``` + +Then rebuild: + +```bash +bazel build -c opt --config=android_arm64 \ +${XNNPACK_OPTIONS} "${BENCHMARK_TOOL_PATH}" \ +--repo_env=HERMETIC_PYTHON_VERSION=3.12 +``` + +This build of the `benchmark_model` disables all SME2 micro-kernels and forces fallback to XNNPACK's NEON or SVE2 kernels. + +You can then use Android Debug Bridge (ADB) to push the benchmark tool to your Android device: + +```bash +adb push bazel-bin/litert/tools/benchmark_model /data/local/tmp/ +adb shell chmod +x /data/local/tmp/benchmark_model +``` + +You have now built both versions of the LiteRT benchmark tool. You are ready to benchmark and compare SME2-accelerated and baseline performance on your Arm-based Android device. diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/3-build-model.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/3-build-model.md new file mode 100644 index 0000000000..4aa8b2e948 --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/3-build-model.md @@ -0,0 +1,170 @@ +--- +title: Create LiteRT models +weight: 3 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## KleidiAI SME2 support in LiteRT + +LiteRT uses XNNPACK as its default CPU backend. KleidiAI micro-kernels are integrated through XNNPACK in LiteRT. Only a subset of KleidiAI Scalable Matrix Extension (SME and SME2) micro-kernels has been integrated into XNNPACK. These micro-kernels support operators using the following data types and quantization configurations in the LiteRT model. Other operators use XNNPACK's default implementation during inference. + +### Supported operator configurations + +#### Fully Connected: + +| Activations | Weights | Output | +| ---------------------------- | --------------------------------------- | ---------------------------- | +| FP32 | FP32 | FP32 | +| FP32 | FP16 | FP32 | +| FP32 | Per-channel symmetric INT8 quantization | FP32 | +| Asymmetric INT8 quantization | Per-channel symmetric INT8 quantization | Asymmetric INT8 quantization | +| FP32 | Per-channel symmetric INT4 quantization | FP32 | + +#### Batch matrix multiply: + +| Input A | Input B | +| ------- | --------------------------------------- | +| FP32 | FP32 | +| FP16 | FP16 | +| FP32 | Per-channel symmetric INT8 quantization | + +#### Conv2D: + +| Activations | Weights | Output | +| ---------------------------- | ----------------------------------------------------- | ---------------------------- | +| FP32 | FP32, pointwise (kernel size is 1) | FP32 | +| FP32 | FP16, pointwise (kernel size is 1) | FP32 | +| FP32 | Per-channel or per-tensor symmetric INT8 quantization | FP32 | +| Asymmetric INT8 quantization | Per-channel or per-tensor symmetric INT8 quantization | Asymmetric INT8 quantization | + +#### TransposeConv: + +| Activations | Weights | Output | +| ---------------------------- | ----------------------------------------------------- | ---------------------------- | +| Asymmetric INT8 quantization | Per-channel or per-tensor symmetric INT8 quantization | Asymmetric INT8 quantization | + + +## Create LiteRT models using Keras + +To demonstrate SME2 acceleration on Android, you will construct simple single-layer models (for example, fully connected) using Keras and convert them into LiteRT (`.tflite`) format. This allows you to benchmark isolated operators and directly observe SME2 improvements. + +Install the TensorFlow package dependency for your script: + +```bash +sudo pip3 install tensorflow +``` + +Save the following script as `model.py`: + +```python +import tensorflow as tf +import numpy as np +import os + +batch_size = 100 +input_size = 640 +output_size = 1280 + +def save_litert_model(model_bytes, filename): + if os.path.exists(filename): + print(f"Warning: {filename} already exists and will be overwritten.") + with open(filename, "wb") as f: + f.write(model_bytes) + +model = tf.keras.Sequential([ + tf.keras.layers.InputLayer(input_shape=(input_size,), batch_size=batch_size), + tf.keras.layers.Dense(output_size) +]) + +# Convert to FP32 model +converter = tf.lite.TFLiteConverter.from_keras_model(model) +fc_fp32 = converter.convert() +save_litert_model(fc_fp32, "fc_fp32.tflite") +``` + +Run the script: + +```bash +python3 model.py +``` + +The model `fc_fp32.tflite` is created in FP32 format. As mentioned above, this operator can invoke the KleidiAI SME2 micro-kernel for acceleration. + +You can then use Android Debug Bridge (ADB) to push the model for benchmarking to your Android device: + +```bash +adb push fc_fp32.tflite /data/local/tmp/ +adb shell chmod +x /data/local/tmp/fc_fp32.tflite +``` + +You can also optimize this Keras model using post-training quantization to create a LiteRT model that suits your requirements. + +--- + +## Post-training quantization options + +**Post-training FP16 quantization** + +```python +# Convert to model with FP16 weights and FP32 activations +converter = tf.lite.TFLiteConverter.from_keras_model(model) +converter.optimizations = [tf.lite.Optimize.DEFAULT] +converter.target_spec.supported_types = [tf.float16] +converter.target_spec._experimental_supported_accumulation_type = tf.dtypes.float16 +fc_fp16 = converter.convert() +save_litert_model(fc_fp16, "fc_fp16.tflite") +``` + +This method applies FP16 quantization to a model with FP32 operators. In practice, this optimization adds metadata to the model to indicate that the model is compatible with FP16 inference. At runtime, XNNPACK replaces the FP32 operators with their FP16 equivalents and inserts additional operators to convert the model inputs from FP32 to FP16 and outputs from FP16 back to FP32. + +KleidiAI provides FP16 packing micro-kernels for both the activations and weights matrix, as well as FP16 matrix multiplication micro-kernels. + +**Post-training INT8 dynamic range quantization** + +```python +# Convert to dynamically quantized INT8 model (INT8 weights, FP32 activations) +converter = tf.lite.TFLiteConverter.from_keras_model(model) +converter.optimizations = [tf.lite.Optimize.DEFAULT] +fc_int8_dynamic = converter.convert() +save_litert_model(fc_int8_dynamic, "fc_dynamic_int8.tflite") +``` + +This quantization method optimizes operators with large parameter sizes by quantizing their weights to INT8 while keeping the activations in the FP32 data format. + +KleidiAI provides micro-kernels that dynamically quantize activations to INT8 at runtime, as well as packing micro-kernels for the weights matrix and INT8 matrix multiplication micro-kernels that produce FP32 outputs. + +**Post-training INT8 static quantization** + +```python +def fake_dataset(): + for _ in range(100): + sample = np.random.rand(input_size).astype(np.float32) + yield [sample] +# Convert to statically quantized INT8 model (INT8 weights and activations) +converter = tf.lite.TFLiteConverter.from_keras_model(model) +converter.optimizations = [tf.lite.Optimize.DEFAULT] +converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] +converter.target_spec.supported_types = [tf.int8] +converter.inference_input_type = tf.int8 +converter.inference_output_type = tf.int8 +converter.representative_dataset = fake_dataset +fc_int8_static = converter.convert() +save_litert_model(fc_int8_static, "fc_static_int8.tflite") +``` + +This quantization method quantizes both the activations and the weights to INT8. + +KleidiAI provides INT8 packing micro-kernels for both the activations and weights matrix, as well as INT8 matrix multiplication micro-kernels. + +## What you've accomplished and what's next +You have now created several LiteRT models with different quantization options, ready for benchmarking on your Arm-based Android device. You have: + +- Built a simple Keras model and converted it to LiteRT (`.tflite`) format +- Generated models with different quantization types: FP32, FP16, INT8 dynamic, and INT8 static +- Learned how each quantization method affects model size, performance, and compatibility with KleidiAI SME2 micro-kernels + +Now that you have created and converted your models, you can benchmark them on your Android device to measure the performance gains from SME2 acceleration. Consider experimenting with additional layers such as Conv2D, BatchMatMul, or TransposeConv to further explore SME2 support for different operators. You can also apply more quantization and optimization techniques to enhance model efficiency. Finally, integrate your optimized models into your Android applications to leverage Arm SME2 acceleration in real-world use cases. + +By following these steps, you can maximize the performance of your machine learning models on Arm-based devices using LiteRT and KleidiAI SME2. diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/4-benchmark.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/4-benchmark.md new file mode 100644 index 0000000000..4290013eeb --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/4-benchmark.md @@ -0,0 +1,230 @@ +title: Benchmark the LiteRT model +weight: 5 +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + + +## Prerequisites for Benchmarking on SME2 + +Before you begin benchmarking LiteRT models on an SME2-capable Android device, make sure you have the following components prepared: + + +Once you have: +- A LiteRT model (for example, `fc_fp32.tflite`) +- The `benchmark_model` binary built with and without KleidiAI and Scalable Matrix Extension version 2 (SME2) + +you can run benchmarks directly on an SME2-capable Android device. + +## Verify SME2 support on the device + +First, check if your Android device supports SME2. +On the device (via Android Debug Bridge, ADB shell), run: + +```bash +cat /proc/cpuinfo +``` + +Look for a `Features` line similar to: + +```output +... +processor : 7 +BogoMIPS : 2000.00 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bf16 dgh bti mte ecv afp mte3 sme smei8i32 smef16f32 smeb16f32 smef32f32 wfxt rprfm sme2 smei16i32 smebi32i32 hbc lrcpc3 +``` + +If you see `sme2` in the features, your CPU supports SME2. + +## Run benchmark_model on an SME2 core + +Next, run the benchmark tool and bind execution to a core that supports SME2. +For example, to pin to CPU 7, use a single thread, and run enough iterations for stable timing: + +```bash +taskset 80 ./benchmark_model --graph=./fc_fp32.tflite --num_runs=1000 --num_threads=1 --use_cpu=true --use_profiler=true +``` + +This command uses `taskset` to run the benchmark on core 7, sets `--num_threads=1`, and runs 1000 inferences. The `--use_profiler=true` flag enables operator-level profiling. + +You should see output similar to: + +```output +... +INFO: [litert/runtime/accelerators/auto_registration.cc:148] CPU accelerator registered. +INFO: [litert/runtime/compiled_model.cc:415] Flatbuffer model initialized directly from incoming litert model. +INFO: Initialized TensorFlow Lite runtime. +INFO: Created TensorFlow Lite XNNPACK delegate for CPU. +VERBOSE: Replacing 1 out of 1 node(s) with delegate (TfLiteXNNPackDelegate) node, yielding 1 partitions for subgraph 0. +INFO: The input model file size (MB): 3.27774 +INFO: Initialized session in 4.478ms. +INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds. +INFO: count=1055 first=1033 curr=473 min=443 max=1033 avg=465.319 std=18 p5=459 median=463 p95=478 + +INFO: Running benchmark for at least 1000 iterations and at least 1 seconds but terminate if exceeding 150 seconds. +INFO: count=2112 first=463 curr=459 min=442 max=979 avg=464.545 std=13 p5=460 median=462 p95=478 + +INFO: [./litert/tools/benchmark_litert_model.h:81] +========== BENCHMARK RESULTS ========== +INFO: [./litert/tools/benchmark_litert_model.h:82] Model initialization: 4.48 ms +INFO: [./litert/tools/benchmark_litert_model.h:84] Warmup (first): 1.03 ms +INFO: [./litert/tools/benchmark_litert_model.h:86] Warmup (avg): 0.47 ms (1055 runs) +INFO: [./litert/tools/benchmark_litert_model.h:88] Inference (avg): 0.46 ms (2112 runs) +INFO: [./litert/tools/benchmark_litert_model.h:92] Inference (min): 0.44 ms +INFO: [./litert/tools/benchmark_litert_model.h:94] Inference (max): 0.98 ms +INFO: [./litert/tools/benchmark_litert_model.h:96] Inference (std): 0.01 +INFO: [./litert/tools/benchmark_litert_model.h:103] Throughput: 525.55 MB/s +INFO: [./litert/tools/benchmark_litert_model.h:112] +Memory Usage: +INFO: [./litert/tools/benchmark_litert_model.h:114] Init footprint: 8.94 MB +INFO: [./litert/tools/benchmark_litert_model.h:116] Overall footprint: 11.51 MB +INFO: [./litert/tools/benchmark_litert_model.h:123] Peak memory usage not available. (peak_mem_mb <= 0) +INFO: [./litert/tools/benchmark_litert_model.h:126] ====================================== + +INFO: [./litert/tools/benchmark_litert_model.h:179] +============================== Run Order ============================== + [node type] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + LiteRT::Run[buffer registration] 0.020 0.014 3.309% 3.309% 0.000 1 LiteRT::Run[buffer registration]/0 + AllocateTensors 0.291 0.291 0.022% 3.331% 452.000 0 AllocateTensors/0 + Static Reshape (NC) 0.085 0.003 0.739% 4.070% 0.000 1 Delegate/Static Reshape (NC):0 + Fully connected (NC, PF32) GEMM 0.538 0.382 92.948% 97.018% 0.000 1 Delegate/fully connected(NC, PF32) GEMM:1 + LiteRT::Run[Buffer sync] 0.013 0.012 2.982% 100.000% 0.000 1 LiteRT::Run[Buffer sync]/0 + +============================== Top by Computation Time ============================== + [node type] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + fully connected(NC, PF32) GEMM 0.538 0.382 92.948% 92.948% 0.000 1 Delegate/fully connected(NC, PF32) GEMM:1 + AllocateTensors 0.291 0.291 0.022% 92.970% 452.000 0 AllocateTensors/0 + LiteRT::Run[buffer registration] 0.020 0.014 3.309% 96.279% 0.000 1 LiteRT::Run[buffer registration]/0 + LiteRT::Run[Buffer sync] 0.013 0.012 2.982% 99.261% 0.000 1 LiteRT::Run[Buffer sync]/0 + Static Reshape (NC) 0.085 0.003 0.739% 100.000% 0.000 1 Delegate/Static Reshape (NC):0 + +Number of nodes executed: 5 +============================== Summary by node type ============================== + [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called] + fully connected(NC, PF32) GEMM 1 0.382 93.171% 93.171% 0.000 1 + LiteRT::Run[buffer registration] 1 0.013 3.171% 96.341% 0.000 1 + LiteRT::Run[Buffer sync] 1 0.012 2.927% 99.268% 0.000 1 + Static Reshape (NC) 1 0.003 0.732% 100.000% 0.000 1 + AllocateTensors 1 0.000 0.000% 100.000% 452.000 0 + +Timings (microseconds): count=3166 first=947 curr=406 min=390 max=947 avg=411.071 std=14 +Memory (bytes): count=0 +5 nodes observed +``` + +You will see the time spent on model initialization, warm-up, and inference, as well as memory usage. With the profiler enabled, the output also reports the execution time of each operator. + +Because the model contains only a single fully connected layer, the node type `Fully Connected (NC, PF32) GEMM` shows the average execution time and its percentage of total inference time. + +{{% notice Note %}} +To verify that KleidiAI SME2 micro-kernels are invoked for the FullyConnected operator during inference, run `simpleperf record -g -- ` to capture the calling graph. If you're using the `benchmark_model`, build it with the `-c dbg` option. +{{% /notice %}} + +## Measure the performance impact of KleidiAI SME2 micro-kernels + +To compare the performance of the KleidiAI SME2 implementation with the original XNNPACK implementation, run the `benchmark_model` tool without KleidiAI enabled using the same parameters: + +```bash +taskset 80 ./benchmark_model --graph=./fc_fp32.tflite --num_runs=1000 --num_threads=1 --use_cpu=true --use_profiler=true +``` + +The output should look like: + +```output +... +INFO: [litert/runtime/accelerators/auto_registration.cc:148] CPU accelerator registered. +INFO: [litert/runtime/compiled_model.cc:415] Flatbuffer model initialized directly from incoming litert model. +INFO: Initialized TensorFlow Lite runtime. +INFO: Created TensorFlow Lite XNNPACK delegate for CPU. +VERBOSE: Replacing 1 out of 1 node(s) with delegate (TfLiteXNNPackDelegate) node, yielding 1 partitions for subgraph 0. +INFO: The input model file size (MB): 3.27774 +INFO: Initialized session in 4.488ms. +INFO: Running benchmark for at least 1 iterations and at least 0.5 seconds but terminate if exceeding 150 seconds. +INFO: count=358 first=1927 curr=1370 min=1363 max=1927 avg=1386.31 std=38 p5=1366 median=1377 p95=1428 + +INFO: Running benchmark for at least 1000 iterations and at least 1 seconds but terminate if exceeding 150 seconds. +INFO: count=1000 first=1407 curr=1370 min=1362 max=1452 avg=1379.64 std=14 p5=1365 median=1373 p95=1409 + +INFO: [./litert/tools/benchmark_litert_model.h:81] +========== BENCHMARK RESULTS ========== +INFO: [./litert/tools/benchmark_litert_model.h:82] Model initialization: 4.49 ms +INFO: [./litert/tools/benchmark_litert_model.h:84] Warmup (first): 1.93 ms +INFO: [./litert/tools/benchmark_litert_model.h:86] Warmup (avg): 1.39 ms (358 runs) +INFO: [./litert/tools/benchmark_litert_model.h:88] Inference (avg): 1.38 ms (1000 runs) +INFO: [./litert/tools/benchmark_litert_model.h:92] Inference (min): 1.36 ms +INFO: [./litert/tools/benchmark_litert_model.h:94] Inference (max): 1.45 ms +INFO: [./litert/tools/benchmark_litert_model.h:96] Inference (std): 0.01 +INFO: [./litert/tools/benchmark_litert_model.h:103] Throughput: 176.96 MB/s +INFO: [./litert/tools/benchmark_litert_model.h:112] +Memory Usage: +INFO: [./litert/tools/benchmark_litert_model.h:114] Init footprint: 9.07 MB +INFO: [./litert/tools/benchmark_litert_model.h:116] Overall footprint: 11.25 MB +INFO: [./litert/tools/benchmark_litert_model.h:123] Peak memory usage not available. (peak_mem_mb <= 0) +INFO: [./litert/tools/benchmark_litert_model.h:126] ====================================== + +INFO: [./litert/tools/benchmark_litert_model.h:179] +============================== Run Order ============================== + [node type] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + LiteRT::Run[buffer registration] 0.026 0.018 1.392% 1.392% 0.000 1 LiteRT::Run[buffer registration]/0 + AllocateTensors 0.195 0.195 0.011% 1.403% 56.000 0 AllocateTensors/0 + Static Reshape (NC) 0.004 0.004 0.307% 1.710% 0.000 1 Delegate/Static Reshape (NC):0 + Fully Connected (NC, F32) GEMM 1.564 1.269 97.059% 98.769% 0.000 1 Delegate/Fully Connected (NC, F32) GEMM:1 + LiteRT::Run[Buffer sync] 0.018 0.016 1.231% 100.000% 0.000 1 LiteRT::Run[Buffer sync]/0 + +============================== Top by Computation Time ============================== + [node type] [first] [avg ms] [%] [cdf%] [mem KB] [times called] [Name] + Fully Connected (NC, F32) GEMM 1.564 1.269 97.059% 97.059% 0.000 1 Delegate/Fully Connected (NC, F32) GEMM:1 + AllocateTensors 0.195 0.195 0.011% 97.070% 56.000 0 AllocateTensors/0 + LiteRT::Run[buffer registration] 0.026 0.018 1.392% 98.462% 0.000 1 LiteRT::Run[buffer registration]/0 + LiteRT::Run[Buffer sync] 0.018 0.016 1.231% 99.693% 0.000 1 LiteRT::Run[Buffer sync]/0 + Static Reshape (NC) 0.004 0.004 0.307% 100.000% 0.000 1 Delegate/Static Reshape (NC):0 + +Number of nodes executed: 5 +============================== Summary by node type ============================== + [Node type] [count] [avg ms] [avg %] [cdf %] [mem KB] [times called] + Fully Connected (NC, F32) GEMM 1 1.268 97.090% 97.090% 0.000 1 + LiteRT::Run[buffer registration] 1 0.018 1.378% 98.469% 0.000 1 + LiteRT::Run[Buffer sync] 1 0.016 1.225% 99.694% 0.000 1 + Static Reshape (NC) 1 0.004 0.306% 100.000% 0.000 1 + AllocateTensors 1 0.000 0.000% 100.000% 56.000 0 + +Timings (microseconds): count=1357 first=1807 curr=1295 min=1291 max=1807 avg=1307.19 std=21 +Memory (bytes): count=0 +5 nodes observed +``` + +You should notice significant throughput uplift and speedup in inference time when KleidiAI SME2 micro-kernels are enabled. + +### Interpreting node type names for KleidiAI + +For the same model, the XNNPACK node type name is different. For the non-KleidiAI implementation, the node type is `Fully Connected (NC, F32) GEMM`, whereas for the KleidiAI implementation, it is `Fully Connected (NC, PF32) GEMM`. + +For other operators supported by KleidiAI, the per-operator profiling node types differ between the implementations with and without KleidiAI enabled in XNNPACK as follows: + +| Operator | Node Type (KleidiAI Enabled) | Node Type (KleidiAI Disabled) | +|----------------------------------------|-------------------------------------------------------|--------------------------------------------------------| +| Fully Connected / Conv2D (Pointwise) | Fully Connected (NC, PF32) | Fully Connected (NC, F32) | +| Fully Connected | Dynamic Fully Connected (NC, PF32) | Dynamic Fully Connected (NC, F32) | +| Fully Connected / Conv2D (Pointwise) | Fully Connected (NC, PF16) | Fully Connected (NC, F16) | +| Fully Connected | Dynamic Fully Connected (NC, PF16) | Dynamic Fully Connected (NC, F16) | +| Fully Connected | Fully Connected (NC, QP8, F32, QC4W) | Fully Connected (NC, QD8, F32, QC4W) | +| Fully Connected / Conv2D (Pointwise) | Fully Connected (NC, QP8, F32, QC8W) | Fully Connected (NC, QD8, F32, QC8W) | +| Fully Connected / Conv2D (Pointwise) | Fully Connected (NC, PQS8, QC8W) | Fully Connected (NC, QS8, QC8W) | +| Batch Matrix Multiply | Batch Matrix Multiply (NC, PF32) | Batch Matrix Multiply (NC, F32) | +| Batch Matrix Multiply | Batch Matrix Multiply (NC, PF16) | Batch Matrix Multiply (NC, F16) | +| Batch Matrix Multiply | Batch Matrix Multiply (NC, QP8, F32, QC8W) | Batch Matrix Multiply (NC, QD8, F32, QC8W) | +| Conv2D | Convolution (NHWC, PQS8, QS8, QC8W) | Convolution (NHWC, QC8) | +| TransposeConv | Deconvolution (NHWC, PQS8, QS8, QC8W) | Deconvolution (NC, QS8, QC8W) | + +The letter “P” in the node type indicates a KleidiAI implementation. + +For example, `Convolution (NHWC, PQS8, QS8, QC8W)` represents a Conv2D operator computed by a KleidiAI micro-kernel, where the tensor is in NHWC layout, the input is packed INT8 quantized, the weights are per-channel INT8 quantized, and the output is INT8 quantized. + +By comparing `benchmark_model` runs with and without KleidiAI and SME2, and inspecting the profiled node types (PF32, PF16, QP8, PQS8), you can confirm that LiteRT is dispatching to SME2-optimized KleidiAI micro-kernels and quantify their performance impact on your Android device. + + +## What you've accomplished and what's next + +In this section, you learned how to benchmark LiteRT models on SME2-capable Android devices, verify SME2 support, and interpret performance results with and without KleidiAI SME2 micro-kernels. You also discovered how to identify which micro-kernels are used during inference by examining node type names in the profiler output. + +You are now ready to further analyze performance, experiment with different models, or explore advanced profiling and optimization techniques for on-device AI with LiteRT, XNNPACK, and KleidiAI. Continue to the next section to deepen your understanding or apply these skills to your own projects. diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_index.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_index.md new file mode 100644 index 0000000000..8fa14d9764 --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_index.md @@ -0,0 +1,55 @@ +--- +title: Accelerate LiteRT Models on Android with KleidiAI and SME2 + +minutes_to_complete: 30 + +who_is_this_for: This is an advanced topic for developers looking to leverage Arm's Scalable Matrix Extension 2 (SME2) instructions to accelerate LiteRT model inference on Android. + +learning_objectives: +- Understand how KleidiAI integrates with LiteRT +- Build the LiteRT benchmark tool and enable XNNPACK and KleidiAI with SME2 support in LiteRT +- Create LiteRT models that can be accelerated by SME2 through KleidiAI +- Use the benchmark tool to evaluate and validate the SME2 acceleration performance of LiteRT models + +prerequisites: +- An Arm64 Linux development machine +- An Android device that supports Arm SME2 architecture features - see this [list of devices with SME2 support](/learning-paths/cross-platform/multiplying-matrices-with-sme2/1-get-started/#devices) + +author: Jiaming Guo + +### Tags +skilllevels: Advanced +subjects: ML +armips: + - Cortex-A + - Cortex-X +tools_software_languages: + - C + - Python +operatingsystems: + - Android + + + +further_reading: + - resource: + title: LiteRT model optimization + link: https://ai.google.dev/edge/litert/models/model_optimization + type: website + - resource: + title: Convert Pytorch model to LiteRT model + link: https://ai.google.dev/edge/litert/models/pytorch_to_tflite + type: website + - resource: + title: LiteRT repository + link: https://github.com/google-ai-edge/LiteRT?tab=readme-ov-file#1--i-have-a-pytorch-model + type: website + + + +### FIXED, DO NOT MODIFY +# ================================================================================ +weight: 1 # _index.md always has weight of 1 to order correctly +layout: "learningpathall" # All files under learning paths have this same wrapper +learning_path_main_page: "yes" # This should be surfaced when looking for related content. Only set for _index.md of learning path content. +--- diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_next-steps.md b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-sw-stack.png b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-sw-stack.png new file mode 100644 index 0000000000..eb4c2b02cd Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-sw-stack.png differ diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-kleidiai-workflow.png b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-kleidiai-workflow.png new file mode 100644 index 0000000000..0820a5b72d Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-kleidiai-workflow.png differ diff --git a/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-workflow.png b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-workflow.png new file mode 100644 index 0000000000..35fabf3c43 Binary files /dev/null and b/content/learning-paths/mobile-graphics-and-gaming/litert-sme/litert-xnnpack-workflow.png differ diff --git a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md index 731c7886af..4d9b825d88 100644 --- a/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md +++ b/content/learning-paths/mobile-graphics-and-gaming/nss-unreal/2-emulation-layer.md @@ -71,9 +71,17 @@ Open your project directory in Windows explorer, and create a new folder called Enabling the plugin will look slightly different depending on what Unreal version you are using. Follow the steps corresponding to your setup. +## For Unreal 5.4 and 4.27 + +1. Copy the downloaded and extracted `.zip` archive into the new `Plugins` directory: + - `UE5.4` or `UE4.27` +2. Re-open Unreal Engine. When prompted, confirm plugin integration. +3. Rebuild your project in Visual Studio from source. +4. Verify the installation by opening the Plugins view in Unreal Engine, and making sure the checkbox is selected for `NSS`. Restart Unreal Engine if prompted. + ## For Unreal 5.5 -1. Copy the downloaded and extracted `.zip` into the new `Plugins` directory: +1. Copy the downloaded and extracted `.zip` archives into the new `Plugins` directory: - `UE5.5` - `NNERuntimeRDGMLExtensionsForVulkan` 2. Re-open Unreal Engine. When prompted, confirm plugin integration. @@ -82,13 +90,4 @@ Enabling the plugin will look slightly different depending on what Unreal versio ![Unreal Engine plugins window showing NSS and NNERuntimeRDGMLExtensionsForVulkan enabled#center](./images/verify_plugin_enabled.png "Figure 5: Verify plugin installation in Unreal Engine.") - -## For Unreal 5.4 - -1. Copy the downloaded and extracted `.zip` into the new `Plugins` directory: - - `UE5.4` -2. Re-open Unreal Engine. When prompted, confirm plugin integration. -3. Rebuild your project in Visual Studio from source. -4. Verify the installation by opening the Plugins view in Unreal Engine, and making sure the checkbox is selected for `NSS`. Restart Unreal Engine if prompted. - With the emulation layers and plugins configured, you're ready to run Neural Super Sampling in Unreal Engine. Continue to the next section to test the integration. diff --git a/content/learning-paths/servers-and-cloud-computing/_index.md b/content/learning-paths/servers-and-cloud-computing/_index.md index b3566a1353..c60510d465 100644 --- a/content/learning-paths/servers-and-cloud-computing/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/_index.md @@ -8,7 +8,7 @@ key_ip: maintopic: true operatingsystems_filter: - Android: 3 -- Linux: 200 +- Linux: 204 - macOS: 13 - Windows: 14 pinned_modules: @@ -19,10 +19,10 @@ pinned_modules: - migration subjects_filter: - CI-CD: 11 -- Containers and Virtualization: 35 -- Databases: 22 +- Containers and Virtualization: 36 +- Databases: 24 - Libraries: 9 -- ML: 34 +- ML: 35 - Performance and Architecture: 74 - Storage: 2 - Web: 17 @@ -66,16 +66,15 @@ tools_software_languages_filter: - Azure: 1 - Azure CLI: 2 - Azure Portal: 1 -- Bash: 1 +- Bash: 3 - bash: 2 -- Bash/Shell scripting: 1 - Bastion: 3 - BOLT: 2 - bpftool: 1 - Buildkite: 1 - C: 10 - C#: 2 -- C++: 12 +- C++: 13 - Capstone: 1 - Cargo: 1 - cassandra-stress: 1 @@ -84,7 +83,8 @@ tools_software_languages_filter: - Clair: 1 - Clang: 13 - ClickBench: 1 -- ClickHouse: 1 +- ClickHouse: 2 +- clickhouse-benchmark: 1 - Cloud Build: 1 - CMake: 1 - conda: 1 @@ -97,6 +97,7 @@ tools_software_languages_filter: - Docker: 26 - Docker Buildx: 1 - Envoy: 3 +- Erlang: 1 - ExecuTorch: 1 - Facter: 1 - FAISS: 1 @@ -125,7 +126,7 @@ tools_software_languages_filter: - Google Test: 1 - Gunicorn: 1 - HammerDB: 1 -- Helm: 2 +- Helm: 3 - Herd7: 1 - Hiera: 1 - Hugging Face: 12 @@ -140,14 +141,14 @@ tools_software_languages_filter: - KEDA: 1 - Kedify: 1 - Keras: 2 -- KinD: 1 +- KinD: 2 - kube-bench: 1 -- Kubernetes: 14 +- Kubernetes: 15 - Libamath: 1 - libbpf: 1 - Linaro Forge: 1 - Litmus7: 1 -- llama.cpp: 1 +- llama.cpp: 2 - Llama.cpp: 2 - LLM: 11 - llvm-mca: 1 @@ -183,12 +184,14 @@ tools_software_languages_filter: - Perf: 1 - PHP: 1 - PHPBench: 1 +- pika: 1 - PostgreSQL: 5 - Profiling: 1 - Puppet: 1 -- Python: 35 +- Python: 37 - PyTorch: 10 - QEMU: 1 +- RabbitMQ: 1 - RAG: 1 - Rails: 1 - Redis: 4 @@ -238,7 +241,7 @@ tools_software_languages_filter: weight: 1 cloud_service_providers_filter: - AWS: 19 -- Google Cloud: 34 +- Google Cloud: 37 - Microsoft Azure: 19 - Oracle: 2 --- diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-shell.png b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-shell.png new file mode 100644 index 0000000000..7e2fc3d1b5 Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-shell.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-ssh.png b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-ssh.png new file mode 100644 index 0000000000..597ccd7fea Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/images/gcp-ssh.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/install-circleci-cli.md b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/install-circleci-cli.md index 0e236d75c2..2aec2334bf 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/install-circleci-cli.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/install-circleci-cli.md @@ -26,6 +26,8 @@ Next, refresh your package list so zypper recognizes the newest packages and dep sudo zypper refresh ``` +You may be prompted to trust or reject a key... if so please press "t" to accept/trust the key. + Now your system is ready to install the required tools for the CircleCI CLI. ## Install Git and required tools diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/instance.md index 739f4f88a5..6daa488285 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-gcp/instance.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-gcp/instance.md @@ -26,6 +26,18 @@ To create a virtual machine based on the C4A instance type: ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") -- Under **OS and Storage**, select **Change**, then select an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. Select the preferred version for your operating system. Ensure you select the **Arm image** variant. Click **Select**. + +- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. +- If using use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type. +- Once appropriately selected and configured, please Click **Select**. - Under **Networking**, enable **Allow HTTP traffic**. - Click **Create** to launch the instance. +- Once created, you should see a "SSH" option to the right in your list of VM instances. Click on this to launch a SSH shell into your VM instance: + +![Invoke a SSH session via your browser alt-text#center](images/gcp-ssh.png "Invoke a SSH session into your running VM instance") + +- A window from your browser should come up and you should now see a shell into your VM instance: + +![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") + +Next, let's install CircleCI! \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/_index.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/_index.md index 1b1dc2aceb..43baf3f84d 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/_index.md @@ -1,30 +1,21 @@ --- -title: Deploy CircleCI Arm Native Workflows on AWS EC2 Graviton2 +title: Deploy CircleCI Arm Native Workflows on AWS EC2 Graviton -minutes_to_complete: 45 +minutes_to_complete: 30 -who_is_this_for: This is an introductory topic for developers and DevOps engineers who want to set up and run CircleCI Arm native workflows on Linux Arm64 virtual machines. You'll use AWS EC2 Graviton2 instances (Neoverse N1) and self-hosted runners. +who_is_this_for: This is an introductory topic for developers and DevOps engineers who want to set up and run CircleCI Arm native workflows on Linux Arm64 virtual machines. You'll use AWS EC2 Graviton instances (Neoverse N1) and self-hosted runners. learning_objectives: - - Provision an AWS EC2 Graviton2 Arm64 virtual machine - - Install and configure a CircleCI self-hosted machine runners on Arm64 + - Create an AWS EC2 Graviton Arm64 virtual machine + - Install and configure CircleCI self-hosted machine runners on Arm64 - Verify the runner by running a simple workflow and test computation - - Define and execute CircleCI job using a machine executor - - Check CPU architecture and execute a basic script to confirm if the runner is operational - - Display CPU information and validate outputs from the sample computation prerequisites: - An [AWS account](https://aws.amazon.com/free/) with billing enabled - - A [CircleCI account](https://circleci.com/) - - Basic familiarity with Linux command line - - Basic understanding of CircleCI concepts such as - [workflows](https://circleci.com/docs/guides/orchestrate/workflows/), - [jobs](https://circleci.com/docs/guides/orchestrate/jobs-steps/), - [resource classes](https://circleci.com/docs/guides/execution-managed/resource-class-overview/), and - [runners](https://circleci.com/docs/guides/execution-runner/runner-overview/) + - A CircleCI account + - Basic understanding of CircleCI workflows, jobs and resource classes - -author: Pareena Verma +author: Annie Tallund ##### Tags skilllevels: Introductory @@ -36,7 +27,7 @@ armips: tools_software_languages: - CircleCI - - Bash/Shell scripting + - Bash - Git @@ -48,8 +39,8 @@ operatingsystems: # ================================================================================ further_reading: - resource: - title: AWS EC2 Documentation - link: https://docs.aws.amazon.com/ec2/index.html + title: AWS EC2 Graviton Documentation + link: https://aws.amazon.com/ec2/graviton/ type: documentation - resource: diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/background.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/background.md index a731b786f6..fbf7e30556 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/background.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/background.md @@ -1,18 +1,16 @@ --- -title: Get Started with CircleCI on AWS Graviton2 +title: Get Started with CircleCI on AWS Graviton weight: 2 layout: "learningpathall" --- -## AWS Graviton2 Arm instances on Amazon EC2 +## AWS Graviton Arm instances on Amazon EC2 -AWS Graviton2 is a family of Arm-based processors designed by AWS and built on Arm Neoverse-N1 cores. These instances deliver exceptional price-to-performance efficiency, making them ideal for compute-intensive workloads such as CI/CD pipelines, microservices, containerized applications, and data processing tasks. +AWS Graviton is a family of Arm-based processors designed by AWS and built on Arm Neoverse-N1 cores. These instances deliver price-to-performance efficiency, making them suitable for compute-intensive workloads such as CI/CD pipelines, microservices, containerized applications, and data processing tasks. -Graviton2-powered EC2 instances provide high performance and energy efficiency compared to traditional x86-based instances while maintaining compatibility with popular Linux distributions and open-source software stacks. - -To learn more about AWS Graviton processors, see the [AWS Graviton2 Processor Overview](https://aws.amazon.com/ec2/graviton/). +Graviton-powered EC2 instances provide high performance and energy efficiency while maintaining compatibility with popular Linux distributions and open-source software stacks. ## CircleCI @@ -22,7 +20,7 @@ It seamlessly integrates with popular version control systems such as GitHub, Bi CircleCI supports multiple execution environments, including Docker, Linux, macOS, and Windows, while providing advanced capabilities like parallel job execution, build caching, and matrix builds for optimized performance. -It is widely adopted by development teams to accelerate build cycles, enforce code quality, automate testing, and streamline application delivery. +It is widely adopted by development teams to accelerate build cycles, enforce code quality, automate testing, and streamline application delivery. -To learn more, visit the [CircleCI website](https://circleci.com/) and the [CircleCI documentation](https://circleci.com/docs/). +Continue to the next section to set up your Graviton instance that you will use to run CircleCI workloads. diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circleci-runner-installation.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circleci-runner-installation.md index a84e0dc5ac..ccc94a66a9 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circleci-runner-installation.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circleci-runner-installation.md @@ -1,35 +1,27 @@ --- -title: Install CircleCI machine runner on AWS Graviton2 +title: Install CircleCI machine runner on AWS Graviton weight: 6 ### FIXED, DO NOT MODIFY layout: learningpathall --- -## Install CircleCI machine runner on AWS Graviton2 +## Install CircleCI machine runner on AWS Graviton -This Learning Path shows you how to install and configure the CircleCI Machine Runner on an AWS Graviton2 (Neoverse N1) instance. With this setup, your self-hosted Arm64 environment can efficiently execute CircleCI jobs directly on the Graviton2 architecture, enabling faster builds and improved performance for Arm-based workloads. +This section provides step-by-step instructions to install and configure the CircleCI Machine Runner. With this setup, your self-hosted Arm64 environment can efficiently execute CircleCI jobs directly on the Graviton architecture, enabling faster builds and improved performance for ARM-based workloads. -## Add the CircleCI package repository -For Debian/Ubuntu-based systems running on AWS Graviton2 (Arm64), first add the official CircleCI repository. This ensures you can install the CircleCI Runner package directly using `apt`: +### Add the CircleCI package repository +For Debian/Ubuntu-based systems running on AWS Graviton (Arm64), first add the official CircleCI repository. This ensures you can install the CircleCI Runner package directly using `apt`. ```console curl -s https://packagecloud.io/install/repositories/circleci/runner/script.deb.sh?any=true | sudo bash ``` -- The `curl` command downloads and executes the repository setup script from CircleCI's official package server. -- It configures the repository on your system, allowing `apt` to fetch and install the CircleCI runner package. -- After successful execution, the CircleCI repository will be added under `/etc/apt/sources.list.d/`. +After successful execution, the CircleCI repository will be added under `/etc/apt/sources.list.d/`. Run the command to verify: -## Configure the runner token -- Each self-hosted runner requires a unique authentication token generated from your resource class in the CircleCI dashboard. -- Copy the token from the CircleCI web interface. -- Export the token as an environment variable and update the runner configuration file as shown: - -```console -export RUNNER_AUTH_TOKEN="YOUR_AUTH_TOKEN" -sudo sed -i "s/<< AUTH_TOKEN >>/$RUNNER_AUTH_TOKEN/g" /etc/circleci-runner/circleci-runner-config.yaml +```bash +ls /etc/apt/sources.list.d/ ``` ## Install the CircleCI runner @@ -87,4 +79,4 @@ This confirms that the CircleCI Runner is actively connected to your CircleCI ac You can also verify it from the dashboard: -![Diagram showing the CircleCI self-hosted runner architecture. The main subject is an AWS Graviton2 server labeled as a self-hosted runner, connected to the CircleCI cloud platform. Arrows indicate job requests flowing from CircleCI to the runner and job results returning to CircleCI. The environment includes icons for cloud infrastructure and developer workstations. The tone is technical and informative. Any visible text in the image is transcribed as: Self-Hosted Runners. alt-text#center](images/runner.png "Self-Hosted Runners ") +![Diagram showing the CircleCI self-hosted runner architecture. The main subject is an AWS Graviton server labeled as a self-hosted runner, connected to the CircleCI cloud platform. Arrows indicate job requests flowing from CircleCI to the runner and job results returning to CircleCI. The environment includes icons for cloud infrastructure and developer workstations. The tone is technical and informative. Any visible text in the image is transcribed as: Self-Hosted Runners. alt-text#center](images/runner.png "Self-Hosted Runners ") diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circlecli-installation.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circlecli-installation.md index 755ed4e4a7..7144edd872 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circlecli-installation.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/circlecli-installation.md @@ -6,9 +6,8 @@ weight: 4 layout: learningpathall --- -## Install CircleCI CLI on AWS Graviton2 (Neoverse N1) instance -This section walks you through how to install the CircleCI command line interface (CLI) on an AWS Graviton2 (Neoverse N1) Arm64 EC2 instance. -With the CLI, you can work with CircleCI from your terminal to check configuration files, manage pipelines, and run self-hosted runners on your EC2 instance. +## Install CircleCI CLI on your instance +This guide walks you through how to install the CircleCI command line interface (CLI). With the CLI, you can work with CircleCI from your terminal to check configuration files, manage pipelines, and run self-hosted runners on your EC2 instance. ## Install the required packages Before installing the CircleCI CLI, ensure your system has the necessary tools for downloading and extracting files: @@ -26,7 +25,7 @@ sudo mv circleci-cli_0.1.33494_linux_arm64/circleci /usr/local/bin/ ``` - The `curl` command fetches the official CircleCI CLI archive from GitHub. - The `| tar xz` command extracts the compressed binary in a single step. -- After extraction, a new folder named **`circleci-cli_0.1.33494_linux_arm64`** appears in your current directory. +- After extraction, a new folder named `circleci-cli_0.1.33494_linux_arm64` appears in your current directory. ## Verify the installation @@ -35,10 +34,10 @@ To ensure that the CLI is installed successfully, check its version: ```console circleci version ``` -You should see an output similar to: +The first time this runs, an interactive shell might open. For now, press `Ctrl+C` to abort. You should now see an output similar to: ```output 0.1.33494+7cc6570 (release) ``` -If this version number appears, the CircleCI CLI installation on your AWS Graviton2 instance was successful! +If this version number appears, the CircleCI CLI installation on your AWS Graviton instance was successful! diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner0.png b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner0.png deleted file mode 100644 index 927a193982..0000000000 Binary files a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner0.png and /dev/null differ diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner1.png b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner1.png deleted file mode 100644 index bb587d2723..0000000000 Binary files a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/images/shrunner1.png and /dev/null differ diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/instance.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/instance.md index f715182c8c..d50444a7f8 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/instance.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/instance.md @@ -1,5 +1,5 @@ --- -title: Create an AWS EC2 Arm64 Graviton2 Instance +title: Create an AWS EC2 Arm64 Graviton Instance weight: 3 ### FIXED, DO NOT MODIFY @@ -8,30 +8,37 @@ layout: learningpathall ## Overview -In this section, you'll learn how to provision an AWS Graviton2 Arm64 EC2 instance on Amazon Web Services (AWS) using the m6g.xlarge instance type (2 vCPUs, 8 GB memory) in the AWS Management Console. +In this section, you will learn how to set up an AWS Graviton Arm64 EC2 instance on Amazon Web Services (AWS) using the `m6g.large` instance type in the AWS Management Console. {{% notice Note %}} For support on AWS setup, see the Learning Path [Getting started with AWS](/learning-paths/servers-and-cloud-computing/csp/aws/). {{% /notice %}} -## Provision the instance in the AWS Management Console -To create a virtual machine based on the AWS Graviton2 Instance type, follow these steps: +## Set up an instance in the AWS Management Console + +To create your virtual machine: - Navigate to the [AWS Management Console](https://aws.amazon.com/console/). -- Go to **EC2 > Instances** and select **Launch Instance**. -- Under **Instance configuration**: - - Enter an appropriate **Instance name**. - - Choose an **Amazon Machine Image (AMI)** such as **Ubuntu 24.04 ARM64**. +- Go to EC2 > Instances and select Launch Instance. +- Under Instance configuration: + - Enter an appropriate Instance name. + - Choose an Amazon Machine Image (AMI) such as Ubuntu 24.04 ARM64. - ![AWS Management Console showing the Amazon Machine Image selection screen with Ubuntu 24.04 ARM64 highlighted. The interface displays a list of available AMIs, each with details such as name, architecture, and description. The wider environment includes navigation menus on the left and a search bar at the top. The mood is neutral and instructional, focused on guiding users through selecting an appropriate AMI. Visible text includes Amazon Machine Image, Ubuntu 24.04 ARM64, and related AMI details. alt-text#center](images/aws1.png "Amazon Machine Image (AMI)") - - Under **Instance type**, select a Graviton2-based type `m6g.xlarge`. + ![AWS Management Console showing the Amazon Machine Image selection screen with Ubuntu 24.04 ARM64 highlighted. The interface displays a list of available AMIs, each with details such as name, architecture, and description. The wider environment includes navigation menus on the left and a search bar at the top. The mood is neutral and instructional, focused on guiding users through selecting an appropriate AMI. Visible text includes Amazon Machine Image, Ubuntu 24.04 ARM64, and related AMI details. alt-text#center](images/aws1.png "Figure 1: Amazon Machine Image (AMI)") + + - Under Instance type, select a Graviton-based type `m6g.large`. ![AWS Management Console displaying the instance type selection screen with m6g.xlarge highlighted. The primary subject is the list of available EC2 instance types, each showing details such as name, vCPUs, memory, and architecture. The m6g.xlarge row is selected, indicating 2 vCPUs and 8 GB memory, with Arm64 architecture. The wider environment includes navigation menus on the left and a search bar at the top. Visible text includes Instance type, m6g.xlarge, vCPUs, Memory, and Arm64. The tone is neutral and instructional, guiding users to select the correct instance type. #alt-text#center](images/aws2.png "Instance type") - - Configure your **Key pair (login)** by either creating a new key pair or selecting an existing one to securely access your instance. - - In **Network settings**, ensure that **Allow HTTP traffic from the internet** and **Allow HTTPS traffic from the internet** are checked. + - Configure your Key pair (login) by either creating a new key pair or selecting an existing one to securely access your instance. + - In Network settings, ensure that Allow HTTP traffic from the internet and Allow HTTPS traffic from the internet are checked. + + ![AWS Management Console showing the Network settings configuration screen for launching an EC2 instance. The primary subject is the Network settings panel, where the options Allow HTTP traffic from the internet and Allow HTTPS traffic from the internet are both checked. The wider environment includes navigation menus on the left and a summary of instance configuration steps at the top. Visible text includes Network settings, Allow HTTP traffic from the internet, and Allow HTTPS traffic from the internet. The tone is neutral and instructional, guiding users to enable the correct network access for their instance. #alt-text#center](images/aws3.png "Figure 3: Network settings") - ![AWS Management Console showing the Network settings configuration screen for launching an EC2 instance. The primary subject is the Network settings panel, where the options Allow HTTP traffic from the internet and Allow HTTPS traffic from the internet are both checked. The wider environment includes navigation menus on the left and a summary of instance configuration steps at the top. Visible text includes Network settings, Allow HTTP traffic from the internet, and Allow HTTPS traffic from the internet. The tone is neutral and instructional, guiding users to enable the correct network access for their instance. #alt-text#center](images/aws3.png "Network settings") + {{% notice Network security %}} + Be careful with permissive network inbound rules, as they pose a security risk. Good practice is to configure the machine to only allow traffic from your IP. + {{% /notice %}} - - Adjust the Storage settings. For most use cases, 30 GB of gp3 (SSD) storage is enough. - - Select **Launch Instance** to create your EC2 virtual machine. + + - Adjust Storage settings as needed — for this setup, 30 GB of gp3 (SSD) storage is sufficient. + - Click Launch Instance to create your virtual machine. diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/resource-class.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/resource-class.md index ecd3df14e5..6d559fba63 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/resource-class.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/resource-class.md @@ -10,20 +10,18 @@ layout: learningpathall This section describes creating a resource class in the CircleCI Web Dashboard for a self-hosted runner. A resource class uniquely identifies the runner and links it to your CircleCI namespace, enabling jobs to run on your custom machine environment. + +A Resource Class uniquely identifies the runner and links it to your CircleCI namespace, enabling jobs to run on your custom machine environment. + ## Register a resource class for your CircleCI self-hosted runner +If you don't have an organization set up already, start by creating one to access the CircleCI dashboard. To register a resource class for your CircleCI self-hosted runner, start by navigating to **Self-Hosted Runners** in the left sidebar of the CircleCI dashboard. You’ll be prompted to accept the terms of use; check the box labeled “Yes, I agree to the terms” to enable runners. Once you’ve agreed, select **Self-Hosted Runners** to continue with the setup process. -![CircleCI dashboard showing the Self-Hosted Runners section. The main subject is the Self-Hosted Runners setup screen with a checkbox labeled Yes I agree to the terms and a button to enable runners. The wider environment includes the CircleCI dashboard interface with navigation sidebar and setup instructions. Visible text: Self-Hosted Runners, Yes I agree to the terms. The emotional tone is neutral and instructional. alt-text#center](images/shrunner0.png "Self-Hosted Runners section") - To create a new resource class, select **Create Resource Class**. -![CircleCI dashboard showing the Create Resource Class button. The main subject is the Self-Hosted Runners setup screen with a prominent button labeled Create Resource Class. The wider environment includes the CircleCI dashboard interface with navigation sidebar and setup instructions. Visible text: Create Resource Class. The emotional tone is neutral and instructional. alt-text#center](images/shrunner1.png "Create Resource Class") - Fill in the details for your new resource class by entering your CircleCI username or organization in the **Namespace** field (for example, `circleci`). In the **Resource Class Name** field, provide a descriptive name for your runner, such as `arm64`, to clearly identify its purpose or architecture. -![CircleCI dashboard showing the form to create a resource class. The main subject is the Details section with fields for Namespace and resource class Name. The wider environment includes the CircleCI dashboard interface with navigation sidebar and setup instructions. Visible text: Namespace, resource class Name, Create resource class. The emotional tone is neutral and instructional. alt-text#center](images/shrunner2.png "Create a resource class") - After creation, CircleCI generates a **Resource Class Token**. Copy this token and store it securely - you need it to register your runner on the AWS Arm VM. ![CircleCI dashboard showing resource Class Token field and copy button. The main subject is the resource Class Token displayed in a text box, with a button labeled Copy next to it. The wider environment includes the CircleCI dashboard interface with navigation sidebar and setup instructions. The emotional tone is neutral and instructional. Visible text: resource class Token, Copy. alt-text#center](images/shrunner3.png "Resource Class Token field and copy button") diff --git a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/validation.md b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/validation.md index ed4a4d321e..5ac82fe311 100644 --- a/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/validation.md +++ b/content/learning-paths/servers-and-cloud-computing/circleci-on-aws/validation.md @@ -14,7 +14,7 @@ This section walks you through validating your self-hosted CircleCI runner on an Start by creating a GitHub repository dedicated to verifying your Arm64 runner: ```console -git clone https://github.com//aws-circleci/ cd aws-circleci ``` This repository serves as a sandbox to confirm that your CircleCI runner can pick up and run jobs for Arm64 workflows. @@ -34,8 +34,14 @@ echo 'echo "Hello from CircleCI Arm64 Runner!"' > hello.sh chmod +x hello.sh ``` -## Define the CircleCI configuration -Now create a `.circleci/config.yml` file to define the workflow that runs on your Arm64 runner: +### Define the CircleCI Configuration +The next step is to add a `.circleci/config.yml` file to define the workflow that will run on your Arm64 runner. Start by creating the directory in your new repository. + +```bash +mkdir .circleci +``` + +Then enter the YAML content: ```yaml version: 2.1 @@ -44,7 +50,7 @@ jobs: test-Arm64: machine: enabled: true - resource_class: your-namespace/Arm64-linux # Replace with your actual resource class + resource_class: circleci/arm64 # Replace with your actual resource class steps: - checkout - run: @@ -66,45 +72,35 @@ workflows: jobs: - test-Arm64 ``` -This configuration does the following: -- Defines a single job called `test-Arm64` that uses a machine executor on your self-hosted Arm64 runner -- Verifies the runner's architecture by running `uname -m` and checking the output of `lscpu` -- Runs the `hello.sh` script to confirm the runner can execute commands -- Performs a sample computation step that displays CPU information and prints a success message +{{% notice Resource Class %}} +In the snippet above, you need to replace the `resource_class` variable with the name you defined in the previous section. +{{% /notice %}} -Each step helps you confirm that your CircleCI Arm64 runner is set up correctly and ready to process jobs. -## Commit and push to GitHub -After you create `hello.sh` and `.circleci/config.yml`, push your project to GitHub so CircleCI can build and verify your Arm64 runner: +This snippet: +- Defines a single job `test-Arm64` using a machine executor on a self-hosted Arm64 runner. +- Checks CPU architecture with `uname -m` and `lscpu` to verify the runner. +- Executes a simple script `hello.sh` to confirm the runner can run commands. +- Runs a sample computation step to display CPU info and print. -```console +### Commit and Push to GitHub +When the files you created (`hello.sh`, `.circleci/config.yml`) are ready, push your project to GitHub so CircleCI can build and verify the Arm64 runner automatically. + + +```bash git add . git commit -m "Initial CircleCI Arm64 test" -git branch -M main -git push -u origin main ``` -Here's what each command does: -- git add . — stages all your files for commit -- git commit -m ... — saves your changes with a message -- git branch -M main — sets your branch to main (if it's not already) -- git push -u origin main — pushes your code to GitHub - -Once your code is on GitHub, CircleCI can start running your workflow automatically. -## Start the CircleCI runner and run your job - -Before you test your workflow, make sure your CircleCI runner is enabled and running. This lets your self-hosted runner pick up jobs from CircleCI: +If you haven't already, you need to configure your GitHub credentials before pushing. Once that is done, run the following to upstream your changes: -```console -sudo systemctl enable circleci-runner -sudo systemctl start circleci-runner -sudo systemctl status circleci-runner +```bash +git branch -M main +git push -u origin main ``` -- Enable the runner so it starts automatically when your machine boots -- Start the runner and check its status to confirm it is running -After you push your code to GitHub, go to your CircleCI Dashboard and select Projects. Look for your test-Arm64 workflow and check that it is running on your self-hosted runner. +After pushing your code to GitHub, open your CircleCI Dashboard → Projects, and confirm that your test-Arm64 workflow starts running using your self-hosted runner. If everything is set up correctly, you’ll see your job running under the resource class you created. diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_index.md new file mode 100644 index 0000000000..43035c8ba2 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_index.md @@ -0,0 +1,60 @@ +--- +title: Deploy ClickHouse on Google Cloud C4A (Arm-based Axion VMs) + +draft: true +cascade: + draft: true + +minutes_to_complete: 30 + +who_is_this_for: This learning path is intended for software developers deploying and optimizing ClickHouse on Linux/Arm64 environments, specifically using Google Cloud C4A virtual machines powered by Axion processors. + +learning_objectives: + - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors) + - Install ClickHouse on a SUSE Arm64 (C4A) instance + - Verify ClickHouse functionality by starting the server, connecting via client, and performing baseline data insertion and simple query tests on the Arm64 VM + - Measure ClickHouse query performance (read, aggregation, and concurrent workloads) to evaluate throughput and latency on Arm64 (Aarch64) + +prerequisites: + - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled + - Basic familiarity with [ClickHouse](https://clickhouse.com/) +author: Pareena Verma + +##### Tags +skilllevels: Introductory +subjects: Databases +cloud_service_providers: Google Cloud + +armips: + - Neoverse + +tools_software_languages: + - ClickHouse + - clickhouse-benchmark + +operatingsystems: + - Linux + +# ================================================================================ +# FIXED, DO NOT MODIFY +# ================================================================================ +further_reading: + - resource: + title: Google Cloud documentation + link: https://cloud.google.com/docs + type: documentation + + - resource: + title: ClickHouse documentation + link: https://clickhouse.com/docs/ + type: documentation + + - resource: + title: ClickHouse benchmark documentation + link: https://clickhouse.com/docs/operations/utilities/clickhouse-benchmark + type: documentation + +weight: 1 +layout: "learningpathall" +learning_path_main_page: "yes" +--- diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_next-steps.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/background.md new file mode 100644 index 0000000000..3ca33a6fd8 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/background.md @@ -0,0 +1,23 @@ +--- +title: Getting started with ClickHouse on Google Axion C4A (Arm Neoverse-V2) + +weight: 2 + +layout: "learningpathall" +--- + +## Google Axion C4A Arm instances in Google Cloud + +Google Axion C4A is a family of Arm-based virtual machines built on Google’s custom Axion CPU, which is based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these virtual machines offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications. + +The C4A series provides a cost-effective alternative to x86 virtual machines while leveraging the scalability and performance benefits of the Arm architecture in Google Cloud. + +To learn more about Google Axion, refer to the [Introducing Google Axion Processors, our new Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog. + +## ClickHouse + +ClickHouse is an open-source, columnar OLAP database designed for **high-performance analytics** and real-time reporting. It supports **vectorized execution, columnar storage, and distributed deployments** for fast queries on large datasets. It offers **scalable, fault-tolerant architecture** with support for replication and sharding. + +Ideal for analytics, monitoring, and event processing, ClickHouse runs efficiently on both x86 and Arm-based platforms, including AWS Graviton and GCP Arm VMs. + +Learn more at the [ClickHouse website](https://clickhouse.com/). diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/baseline.md new file mode 100644 index 0000000000..65d714d91c --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/baseline.md @@ -0,0 +1,174 @@ +--- +title: ClickHouse Baseline Testing on Google Axion C4A Arm Virtual Machine +weight: 5 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## ClickHouse Baseline Testing on GCP SUSE VMs +This section validates that ClickHouse is functioning correctly and provides a **basic performance baseline** on a SUSE Linux Arm64 VM. + + +### Verify ClickHouse is running + +```console +sudo systemctl status clickhouse-server +``` + +This confirms that the ClickHouse server is running correctly under systemd and ready to accept connections. + +```output +● clickhouse-server.service - ClickHouse Server + Loaded: loaded (/etc/systemd/system/clickhouse-server.service; enabled; vendor preset: disabled) + Active: active (running) since Thu 2025-11-27 05:07:42 UTC; 18s ago + Main PID: 4229 (ClickHouseWatch) + Tasks: 814 + CPU: 2.629s + CGroup: /system.slice/clickhouse-server.service + ├─ 4229 clickhouse-watchdog server --config=/etc/clickhouse-server/config.xml + └─ 4237 /usr/bin/clickhouse server --config=/etc/clickhouse-server/config.xml +``` + +### Connect to ClickHouse +Client connection ensures that the ClickHouse CLI can successfully communicate with the running server. + +```console +clickhouse client +``` +### Create a test database and table +Database and table creation sets up a dedicated test environment and an analytics-optimized MergeTree table for baseline evaluation. + +```sql +CREATE DATABASE baseline_test; +USE baseline_test; +``` + +You should see an output similar to: +```output +CREATE DATABASE baseline_test +Query id: bc615167-ecd5-4470-adb0-918d8ce07caf +Ok. +0 rows in set. Elapsed: 0.012 sec. + + +USE baseline_test +Query id: cd49553a-c0ff-4656-a3e5-f0e9fccd9eba +Ok. +0 rows in set. Elapsed: 0.001 sec. +``` +Create a simple table optimized for analytics: + +```sql +CREATE TABLE events +( + event_time DateTime, + user_id UInt64, + event_type String +) +ENGINE = MergeTree +ORDER BY (event_time, user_id); +``` + +You should see an output similar to: +```output +Query id: 62ce9b9c-9a7b-45c8-9a58-fa6302b13a88 + +Ok. + +0 rows in set. Elapsed: 0.011 sec. +``` + +### Insert baseline test data +Data insertion loads a small, controlled dataset to simulate real event data and validate write functionality. +Insert sample data (10,000 rows): + +```sql +INSERT INTO events +SELECT + now() - number, + number, + 'click' +FROM numbers(10000); +``` + +You should see an output similar to: +```output +Query id: af860501-d903-4226-9e10-0e34467f7675 + +Ok. + +10000 rows in set. Elapsed: 0.003 sec. Processed 10.00 thousand rows, 80.00 KB (3.36 million rows/s., 26.86 MB/s.) +Peak memory usage: 3.96 MiB. +``` + +**Verify row count:** + +Row count validation verifies that the inserted data is stored correctly and consistently. + +```sql +SELECT count(*) FROM events; +``` + +You should see an output similar to: +```output +Query id: 644f6556-e69b-4f98-98ec-483ee6869d6e + + ┌─count()─┐ +1. │ 10000 │ + └─────────┘ + +1 row in set. Elapsed: 0.002 sec. +``` + +### Baseline read performance test +Baseline read queries measure basic query performance for filtering, aggregation, and grouping, establishing an initial performance reference on the Arm64 VM. + +- Run simple analytical queries: + +```sql +SELECT count(*) FROM events WHERE event_type = 'click'; +``` + +You should see an output similar to: +```output +Query id: bd609de4-c08e-4f9f-804a-ee0528c94e4d + + ┌─count()─┐ +1. │ 10000 │ + └─────────┘ + +1 row in set. Elapsed: 0.003 sec. Processed 10.00 thousand rows, 130.00 KB (2.98 million rows/s., 38.71 MB/s.) +Peak memory usage: 392.54 KiB. +``` + +- This query groups events by date and counts how many events occurred on each day, returning a daily summary of total events in chronological order. + +```sql +SELECT + toDate(event_time) AS date, + count(*) AS total_events +FROM events +GROUP BY date +ORDER BY date; +``` + +You should see an output similar to: +```output +Query id: b3db69f8-c885-419f-9900-53d258f0b996 + + ┌───────date─┬─total_events─┐ +1. │ 2025-11-27 │ 10000 │ + └────────────┴──────────────┘ + +1 row in set. Elapsed: 0.002 sec. Processed 10.00 thousand rows, 40.00 KB (4.08 million rows/s., 16.33 MB/s.) +Peak memory usage: 785.05 KiB. +``` + +Exit the client: + +```console +exit; +``` + +The baseline tests confirm that ClickHouse is stable, functional, and performing efficiently on the Arm64 VM. With core operations validated, the setup is now ready for detailed performance benchmarking. diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/benchmarking.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/benchmarking.md new file mode 100644 index 0000000000..a57ed3ff12 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/benchmarking.md @@ -0,0 +1,265 @@ +--- +title: ClickHouse Benchmarking +weight: 6 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + + +## ClickHouse Benchmark on GCP SUSE Arm64 VM +ClickHouse provides an official benchmarking utility called **`clickhouse-benchmark`**, which is included **by default** in the ClickHouse installation. +This tool measures **query throughput and latency**. + +### Verify the benchmarking tool exists +Confirm that `clickhouse-benchmark` is installed and available on the system before running performance tests. + +```console +which clickhouse-benchmark +``` +You should see an output similar to: + +```output +/usr/bin/clickhouse-benchmark +``` + +### Prepare benchmark database and table +Create a test database and table structure where sample data will be stored for benchmarking. + +```console +clickhouse client +``` + +```sql +CREATE DATABASE IF NOT EXISTS bench; +USE bench; + +CREATE TABLE IF NOT EXISTS hits +( + event_time DateTime, + user_id UInt64, + url String +) +ENGINE = MergeTree +ORDER BY (event_time, user_id); +``` +You should see an output similar to: +```output +Query id: 83485bc4-ad93-4dfc-bafe-c0e2a45c1b34 +Ok. +0 rows in set. Elapsed: 0.005 sec. +``` + +Exit client: + +```console +exit; +``` +### Load benchmark data +Insert 1 million sample records into the table to simulate a realistic workload for testing query performance. + +```sql +clickhouse-client --query " +INSERT INTO bench.hits +SELECT + now() - number, + number, + concat('/page/', toString(number % 100)) +FROM numbers(1000000)" +``` + +This inserts 1 million rows. + +**Verify:** + +Check that the data load was successful by counting the total number of rows in the table. + +```sql +clickhouse-client --query "SELECT count(*) FROM bench.hits" +``` + +You should see an output similar to: +```output +1000000 +``` + +### Read query benchmark +Measures how fast ClickHouse can scan and count rows using a simple filter, showing basic read performance and low latency. + +```sql +clickhouse-benchmark \ + --host localhost \ + --port 9000 \ + --iterations 10 \ + --concurrency 1 \ + --query "SELECT count(*) FROM bench.hits WHERE url LIKE '/page/%'" +``` + +You should see an output similar to: +```output +Loaded 1 queries. + +Queries executed: 10 (100%). + +localhost:9000, queries: 10, QPS: 63.167, RPS: 63167346.434, MiB/s: 957.833, result RPS: 63.167, result MiB/s: 0.000. + +0% 0.003 sec. +10% 0.003 sec. +20% 0.003 sec. +30% 0.004 sec. +40% 0.004 sec. +50% 0.004 sec. +60% 0.004 sec. +70% 0.004 sec. +80% 0.004 sec. +90% 0.004 sec. +95% 0.005 sec. +99% 0.005 sec. +99.9% 0.005 sec. +99.99% 0.005 sec. +``` + + +### Benchmark aggregation query +Test the performance of grouping and aggregation operations, demonstrating analytical query efficiency. + +```sql +clickhouse-benchmark \ + --host localhost \ + --port 9000 \ + --iterations 10 \ + --concurrency 2 \ + --query " + SELECT + url, + count(*) AS total + FROM bench.hits + GROUP BY url + " +``` + +You should see an output similar to: +```output +Queries executed: 10 (100%). + +localhost:9000, queries: 10, QPS: 67.152, RPS: 67151788.647, MiB/s: 1018.251, result RPS: 6715.179, result MiB/s: 0.153. + +0% 0.005 sec. +10% 0.005 sec. +20% 0.005 sec. +30% 0.007 sec. +40% 0.007 sec. +50% 0.007 sec. +60% 0.007 sec. +70% 0.007 sec. +80% 0.007 sec. +90% 0.007 sec. +95% 0.008 sec. +99% 0.008 sec. +99.9% 0.008 sec. +99.99% 0.008 sec. +``` + +### Benchmark concurrent read workload +Run multiple queries at the same time to evaluate how well ClickHouse handles higher user load and parallel processing. + +```sql +clickhouse-benchmark \ + --host localhost \ + --port 9000 \ + --iterations 20 \ + --concurrency 8 \ + --query " + SELECT count(*) + FROM bench.hits + WHERE user_id % 10 = 0 + " +``` + +You should see an output similar to: +```output +Loaded 1 queries. + +Queries executed: 20 (100%). + +localhost:9000, queries: 20, QPS: 99.723, RPS: 99723096.882, MiB/s: 760.827, result RPS: 99.723, result MiB/s: 0.001. + +0% 0.012 sec. +10% 0.012 sec. +20% 0.013 sec. +30% 0.017 sec. +40% 0.020 sec. +50% 0.029 sec. +60% 0.029 sec. +70% 0.038 sec. +80% 0.051 sec. +90% 0.062 sec. +95% 0.063 sec. +99% 0.078 sec. +99.9% 0.078 sec. +99.99% 0.078 sec. +``` + +### Measuring insert performance +Measures bulk data ingestion speed and write latency under concurrent insert operations. + +```sql +clickhouse-benchmark \ + --iterations 5 \ + --concurrency 4 \ + --query " + INSERT INTO bench.hits + SELECT + now(), + rand64(), + '/benchmark' + FROM numbers(500000) + " +``` + +You should see an output similar to: +```output +Queries executed: 5 (100%). + +localhost:9000, queries: 5, QPS: 20.935, RPS: 10467305.309, MiB/s: 79.859, result RPS: 0.000, result MiB/s: 0.000. + +0% 0.060 sec. +10% 0.060 sec. +20% 0.060 sec. +30% 0.060 sec. +40% 0.068 sec. +50% 0.068 sec. +60% 0.068 sec. +70% 0.069 sec. +80% 0.069 sec. +90% 0.073 sec. +95% 0.073 sec. +99% 0.073 sec. +99.9% 0.073 sec. +99.99% 0.073 sec. +``` +### Benchmark Metrics Explanation + +- **QPS (Queries Per Second):** Indicates how many complete queries ClickHouse can execute per second. Higher QPS reflects stronger overall query execution capacity. +- **RPS (Rows Per Second):** Shows the number of rows processed every second. Very high RPS values demonstrate ClickHouse’s efficiency in scanning large datasets. +- **MiB/s (Throughput):** Represents data processed per second in mebibytes. High throughput highlights effective CPU, memory, and disk utilization during analytics workloads. +- **Latency Percentiles (p50, p95, p99):** Measure query response times. p50 is the median latency, while p95 and p99 show tail latency under heavier load—critical for understanding performance consistency. +- **Iterations:** Number of times the same query is executed. More iterations improve measurement accuracy and stability. +- **Concurrency:** Number of parallel query clients. Higher concurrency tests ClickHouse’s ability to scale under concurrent workloads. +- **Result RPS / Result MiB/s:** Reflects the size and rate of returned query results. Low values are expected for aggregate queries like `COUNT(*)`. +- **Insert Benchmark Metrics:** Write tests measure ingestion speed and stability, where consistent latency indicates reliable bulk insert performance. + +### Benchmark summary +Results from the earlier run on the `c4a-standard-4` (4 vCPU, 16 GB memory) Arm64 VM in GCP (SUSE): + +| Test Category | Test Case | Query / Operation | Iterations | Concurrency | QPS | Rows / sec (RPS) | Throughput (MiB/s) | p50 Latency | p95 Latency | p99 Latency | +| ----------------------- | -------------- | -------------------------------------- | ---------: | ----------: | ----: | ---------------: | -----------------: | ----------: | ----------: | ----------: | +| Read | Filtered COUNT | `COUNT(*) WHERE url LIKE '/page/%'` | 10 | 1 | 63.17 | 63.17 M | 957.83 | 4 ms | 5 ms | 5 ms | +| Read / Aggregate | GROUP BY | `GROUP BY url` | 10 | 2 | 67.15 | 67.15 M | 1018.25 | 7 ms | 8 ms | 8 ms | +| Read (High Concurrency) | Filtered COUNT | `COUNT(*) WHERE user_id % 10 = 0` | 20 | 8 | 99.72 | 99.72 M | 760.83 | 29 ms | 63 ms | 78 ms | +| Write | Bulk Insert | `INSERT SELECT … FROM numbers(500000)` | 5 | 4 | 20.94 | 10.47 M | 79.86 | 68 ms | 73 ms | 73 ms | + +- **High Read Throughput:** Simple filtered reads and aggregations achieved over **63–67 million rows/sec**, demonstrating strong scan and aggregation performance on Arm64. +- **Scales Under Concurrency:** At higher concurrency (8 clients), the system sustained nearly **100 million rows/sec**, showing efficient parallel execution and CPU utilization. +- **Fast Aggregations:** `GROUP BY` workloads delivered over **1 GiB/s throughput** with low single-digit millisecond latency at moderate concurrency. +- **Stable Write Performance:** Bulk inserts maintained consistent throughput with predictable latency, indicating reliable ingestion performance on C4A Arm cores. diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-shell.png b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-shell.png new file mode 100644 index 0000000000..7e2fc3d1b5 Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-shell.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-ssh.png b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-ssh.png new file mode 100644 index 0000000000..597ccd7fea Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-ssh.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-vm.png b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-vm.png new file mode 100644 index 0000000000..0d1072e20d Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/images/gcp-vm.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/installation.md new file mode 100644 index 0000000000..c036e9213b --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/installation.md @@ -0,0 +1,195 @@ +--- +title: Install ClickHouse +weight: 4 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Install ClickHouse on GCP VM +This guide covers installing, configuring, and validating ClickHouse on a GCP SUSE Linux Arm64 VM. It includes system preparation, installing ClickHouse with the official installer, verifying the setup, starting the server, and connecting via the client. The guide also configures ClickHouse as a systemd service to ensure reliable, automatic startup on Arm-based environments. + +### Install required system packages and the ClickHouse repo +Refresh system repositories and install basic utilities needed to download and run ClickHouse. + +```console +sudo zypper refresh +sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g +sudo zypper --gpg-auto-import-keys refresh clickhouse-stable +``` + +### Install ClickHouse via the ClickHouse repo +Download and install ClickHouse for SuSE systems: + +```console +sudo zypper install -y clickhouse-server clickhouse-client +``` + +This installs: + +- **ClickHouse Server** – Runs the core database engine and handles all data storage, queries, and processing. +- **ClickHouse Client** – Provides a command-line interface to connect to the server and run SQL queries. +- **ClickHouse Local** – Allows running SQL queries on local files without starting a server. +- **Default configuration files (/etc/clickhouse-server)** – Stores server settings such as ports, users, storage paths, and performance tuning options. + +### Verify the installed version +Confirm that all ClickHouse components are installed correctly by checking their versions. + +```console +clickhouse --version +clickhouse server --version +clickhouse client --version +clickhouse local --version +``` + +You should see an output similar to: +```output +ClickHouse local version 25.11.2.24 (official build). +ClickHouse server version 25.11.2.24 (official build). +ClickHouse client version 25.11.2.24 (official build). +``` + +### Create ClickHouse user and directories +Create a dedicated system user and required directories for data, logs, and runtime files. + +```console +sudo useradd -r -s /sbin/nologin clickhouse || true +sudo mkdir -p /var/lib/clickhouse +sudo mkdir -p /var/log/clickhouse-server +sudo mkdir -p /var/run/clickhouse-client +``` +Set proper ownership so ClickHouse can access these directories. + +```console +sudo chown -R clickhouse:clickhouse \ + /var/lib/clickhouse \ + /var/log/clickhouse-server \ + /var/run/clickhouse-client +sudo chmod 755 /var/lib/clickhouse \ + /var/log/clickhouse-server \ + /var/run/clickhouse-client +``` + +### Start ClickHouse Server manually +You can just run the ClickHouse server in the foreground to confirm the configuration is valid. + +```console +sudo -u clickhouse clickhouse server --config-file=/etc/clickhouse-server/config.xml +``` +Keep this terminal open while testing. + +### Connect using ClickHouse Client +Open a new SSH terminal and connect to the ClickHouse server. + +```console +clickhouse client +``` +Run a test query to confirm connectivity. + +```sql +SELECT version(); +``` +You should see an output similar to: +```output +SELECT version() + +Query id: ddd3ff38-c0c6-43c5-8ae1-d9d07af4c372 + + ┌─version()───┐ +1. │ 25.11.2.24 │ + └─────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` + +Please close the client SSH terminal and press "ctrl-c" in the server SSH terminal to halt the manual invocation of ClickHouse. FYI, the server may take a few seconds to close down when "ctrl-c" is received. + +{{% notice Note %}} +Recent benchmarks show that ClickHouse (v22.5.1.2079-stable) delivers up to 26% performance improvements on Arm-based platforms, such as AWS Graviton3, compared to other architectures, highlighting the efficiency of its vectorized execution engine on modern Arm CPUs. +You can view [this Blog](https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/improve-clickhouse-performance-up-to-26-by-using-aws-graviton3) + +The [Arm Ecosystem Dashboard](https://developer.arm.com/ecosystem-dashboard/) recommends ClickHouse version v22.5.1.2079-stable, the minimum recommended on the Arm platforms. +{{% /notice %}} + +### Create a systemd service +Set up ClickHouse as a system service so it starts automatically on boot. + +```console +sudo tee /etc/systemd/system/clickhouse-server.service <<'EOF' +[Unit] +Description=ClickHouse Server +After=network.target + +[Service] +Type=simple +User=clickhouse +Group=clickhouse +ExecStart=/usr/bin/clickhouse server --config=/etc/clickhouse-server/config.xml +Restart=always +RestartSec=10 +LimitNOFILE=1048576 + +[Install] +WantedBy=multi-user.target +EOF +``` +**Reload systemd and enable the service:** + +```console +sudo systemctl enable clickhouse-server +sudo systemctl start clickhouse-server +sudo systemctl daemon-reload +``` + +{{% notice Note %}} +You may get the following error which can be safely ignored: + +"ln: failed to create symbolic link '/etc/init.d/rc2.d/S50clickhouse-server': No such file or directory" +{{% /notice %}} + +### Verify ClickHouse service +Ensure the ClickHouse server is running correctly as a background service. + +```console +sudo systemctl status clickhouse-server +``` + +This confirms that the ClickHouse server is running correctly under systemd and ready to accept connections. + +```output +● clickhouse-server.service - ClickHouse Server + Loaded: loaded (/etc/systemd/system/clickhouse-server.service; enabled; vendor preset: disabled) + Active: active (running) since Thu 2025-11-27 05:07:42 UTC; 18s ago + Main PID: 4229 (ClickHouseWatch) + Tasks: 814 + CPU: 2.629s + CGroup: /system.slice/clickhouse-server.service + ├─ 4229 clickhouse-watchdog server --config=/etc/clickhouse-server/config.xml + └─ 4237 /usr/bin/clickhouse server --config=/etc/clickhouse-server/config.xml +``` + +### Final validation +Reconnect to ClickHouse and confirm it is operational. + +```console +clickhouse client +``` + +```sql +SELECT version(); +``` + +You should see an output similar to: +```output +SELECT version() + +Query id: ddd3ff38-c0c6-43c5-8ae1-d9d07af4c372 + + ┌─version()───┐ +1. │ 25.12.1.168 │ + └─────────────┘ + +1 row in set. Elapsed: 0.001 sec. +``` + +ClickHouse is now successfully installed, configured, and running on SUSE Linux Arm64 with automatic startup enabled. diff --git a/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/instance.md new file mode 100644 index 0000000000..ddc0fd1bb4 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/clickhouse-gcp/instance.md @@ -0,0 +1,43 @@ +--- +title: Create a Google Axion C4A Arm virtual machine on GCP +weight: 3 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Overview + +In this section, you will learn how to provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console. + +{{% notice Note %}} +For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](https://learn.arm.com/learning-paths/servers-and-cloud-computing/csp/google/). +{{% /notice %}} + +## Provision a Google Axion C4A Arm VM in Google Cloud Console + +To create a virtual machine based on the C4A instance type: +- Navigate to the [Google Cloud Console](https://console.cloud.google.com/). +- Go to **Compute Engine > VM Instances** and select **Create Instance**. +- Under **Machine configuration**: + - Populate fields such as **Instance name**, **Region**, and **Zone**. + - Set **Series** to `C4A`. + - Select `c4a-standard-4` for machine type. + + ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") + + +- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. +- If using use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type. +- Once appropriately selected and configured, please Click **Select**. +- Under **Networking**, enable **Allow HTTP traffic**. +- Click **Create** to launch the instance. +- Once created, you should see a "SSH" option to the right in your list of VM instances. Click on this to launch a SSH shell into your VM instance: + +![Invoke a SSH session via your browser alt-text#center](images/gcp-ssh.png "Invoke a SSH session into your running VM instance") + +- A window from your browser should come up and you should now see a shell into your VM instance: + +![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") + +Next, let's install ClickHouse! \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/_index.md index f70f62ef38..d7d0f7c104 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/_index.md @@ -1,18 +1,15 @@ --- -title: Deploy Couchbase on Google Cloud C4A (Arm-based Axion VMs) +title: Deploy Couchbase on Google Cloud C4A -draft: true -cascade: - draft: true minutes_to_complete: 30 -who_is_this_for: This learning path is intended for software developers deploying and optimizing Couchbase workloads on Arm Linux environments, specifically using Google Cloud C4A virtual machines powered by Axion processors. +who_is_this_for: This is an introductory topic for developers deploying Couchbase workloads on Arm Linux environments, specifically using Google Cloud C4A virtual machines (VM) powered by Axion processors. learning_objectives: - - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors) + - Provision an Arm-based SUSE Linux Enterprise Server (SLES) virtual machine on Google Cloud (C4A with Axion processors) - Install Couchbase Server on the SUSE Arm64 (C4A) instance - - Verify Couchbase deployment by accessing the Web Console, creating a test bucket, and confirming cluster health + - Verify Couchbase deployment by accessing the web console, creating a test bucket, and confirming cluster health - Benchmark Couchbase by measuring operations per second (ops/sec), memory utilization, and disk performance on the Arm platform prerequisites: diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/background.md index 2fb616d642..25e5ba5dc1 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/background.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/background.md @@ -1,5 +1,5 @@ --- -title: Getting started with Couchbase on Google Axion C4A (Arm Neoverse-V2) +title: Get started with Couchbase on Google Axion C4A weight: 2 @@ -16,8 +16,7 @@ To learn more about Google Axion, refer to the [Introducing Google Axion Process ## Couchbase -Couchbase is an open-source NoSQL distributed database designed for building high-performance, scalable, and flexible modern applications. Developed by Couchbase, Inc. -It combines the capabilities of a key-value store, document database, and distributed caching system in a single unified platform. +Couchbase is an open-source NoSQL distributed database designed for building high-performance, scalable, and flexible modern applications. Developed by Couchbase, Inc. It combines the capabilities of a key-value store, document database, and distributed caching system in a single unified platform. Couchbase provides a memory-first architecture for low-latency data access, along with a powerful query engine (N1QL) that supports SQL-like syntax for JSON data. It also features built-in replication, automatic sharding, and cross-datacenter synchronization (XDCR), enabling seamless scalability and high availability across clusters and regions. @@ -25,4 +24,4 @@ It supports both on-premises and cloud deployments (including AWS, Azure, and GC Known for its high throughput, low latency, and ease of scaling, Couchbase is ideal for use cases such as real-time analytics, session management, content delivery, IoT, and mobile synchronization through Couchbase Mobile. -To learn more, visit the official [Couchbase website](https://www.couchbase.com/) +To learn more, visit the [Couchbase website](https://www.couchbase.com/). diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/baseline.md index 48f1a7c0b5..c64a751868 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/baseline.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/baseline.md @@ -1,43 +1,54 @@ --- -title: Couchbase Baseline Testing on Google Axion C4A Arm Virtual Machine +title: Perform Couchbase baseline testing weight: 6 ### FIXED, DO NOT MODIFY layout: learningpathall --- -## Couchbase Baseline Testing on GCP SUSE VMs -This section confirms that Couchbase is correctly installed and running on the GCP SUSE Arm64 VM. It includes initializing the cluster, verifying node status, and accessing the Web UI to create a bucket — this ensures the setup is ready for benchmarking. +## Verify Couchbase cluster health and prepare a benchmarking bucket +This section guides you through verifying that Couchbase is installed and running on your GCP SUSE VM with Arm64 architecture. You'll set up your cluster, confirm your node is healthy, and use the web console to create a bucket. After completing these steps, your environment is ready for benchmarking. -### Setup the default cluster -Once the service is running, we need to setup the default cluster for the first time. +## Set up the default cluster +Once Couchbase is running, set up your default cluster for the first time. -- Open Web Console using your VM public IP address that you saved off in the last step: +Open the **Couchbase Web Console** in your browser using your VM's public IP address from the previous step: ```console http://:8091 ``` -- Press "Setup New Cluster" -![Setup new cluster](images/cluster-setup-1.png "Setup new cluster") +Select **Set up new cluster** to begin the initial configuration. -- Provide a name for your cluster (example: "my_cluster") and create a password for your administrator account (leave the username as the default "Administrator") +![Couchbase Web Console setup screen showing fields for cluster name, administrator password, and a button labeled Set up new cluster. The interface is clean and organized with a sidebar on the left and main setup form in the center. The environment appears welcoming and straightforward, designed to guide users through initial cluster configuration. Visible text includes Set up new cluster, Cluster Name, Administrator Password, and Set up new cluster button. alt-text#center](images/cluster-setup-1.png "Set up new cluster") -![Create cluster and admin count](images/cluster-setup-2.png "Create cluster and admin count") +Provide a name for your cluster (for example, "my_cluster") and create a password for your administrator account (leaving the username as the default `Administrator`). -- Check the "Accept terms" checkbox and press "Configure Disk, Memory, Services" button +![Couchbase Web Console showing the cluster creation screen with fields for cluster name and administrator password. The main form is centered with a sidebar on the left. Visible text includes Set up new cluster, Cluster Name, Administrator Password, and Set up new cluster button. The interface is clean and welcoming, designed to guide users through initial cluster setup. alt-text#center](images/cluster-setup-2.png "Create cluster and admin count") +Check the **Accept Terms** box to agree to the license terms. Then select **Configure Disk Memory Services** to continue with the cluster setup. -![Accept Terms](images/cluster-setup-3.png "Accept Terms") +![Couchbase Web Console showing the Accept Terms screen for cluster setup. The main form displays a checkbox labeled Accept Terms and a button labeled Configure Disk Memory Services. The interface is clean and organized with a sidebar on the left and the setup form centered. Visible text includes Accept Terms and Configure Disk Memory Services. The environment feels welcoming and guides users through the initial configuration process. alt-text#center](images/cluster-setup-3.png "Accept Terms") -- Accept the defaults of your cluster configuration and press "Save & Finish" +Accept the defaults of your cluster configuration and select "Save & Finish". -![Finalize configuration](images/cluster-setup-4.png "Finalize configuration") +![Couchbase Web Console showing the final configuration screen for cluster setup. The main form displays fields and buttons for finalizing cluster settings, including disk, memory, and services configuration. The sidebar is visible on the left, and the interface is organized and welcoming, designed to guide users through the last step of cluster setup. Visible text includes Finalize configuration, Configure Disk Memory Services, and Save Finish. The environment feels supportive and clear, helping users complete the initial Couchbase cluster configuration. alt-text#center](images/cluster-setup-4.png "Finalize configuration") +Your default cluster is now set up. Save the password you created for your Administrator account. You'll need this password for future steps, including verifying cluster health and creating your benchmarking bucket. -Our default cluster is now created! Please retain the password you created for your "Administrator" account... you'll need that in the next steps. +## Verify cluster nodes +Run the following command to verify that your Couchbase node is healthy. Replace `password` with the Administrator password you set earlier. -### Verify Cluster Nodes -This command checks if your Couchbase server (called a “node”) is running properly. Replace "password" with your specified Couchbase Administrator password. -If the output says **“healthy active”**, it means your Couchbase node is working fine and ready for operations. +```console +/opt/couchbase/bin/couchbase-cli server-list \ + -u Administrator -p password --cluster localhost +``` + +The expected output is: + +```output +ns_1@cb.local 127.0.0.1:8091 healthy active +``` + +If you see `healthy active`, your Couchbase node is running correctly and ready for benchmarking. ```console /opt/couchbase/bin/couchbase-cli server-list \ @@ -48,28 +59,28 @@ If the output says **“healthy active”**, it means your Couchbase node is wor ns_1@cb.local 127.0.0.1:8091 healthy active ``` -### Prepare a Couchbase Bucket for benchmarking -Once the service is running, you can access the **Couchbase Web Console** to create a bucket for benchmarking. +## Prepare a Couchbase bucket for benchmarking +Once the service is running, you can access the Couchbase Web Console to create a bucket for benchmarking. -Open Web Console using the public IP address of your VM that you saved off from the last step: +Open the Couchbase Web Console in your browser. Enter your VM's public IP address from the previous step, followed by `:8091`. For example: ```console http://:8091 ``` + Use the admin `username` (default is "Administrator") and `password` you created during Couchbase cluster setup in the previous step. -![Couchbase Dashboard](images/dashboard-1.png "Couchbase Dashboard") +![Couchbase Dashboard showing cluster health, server statistics, and bucket status. The dashboard displays panels for cluster overview, server nodes, and buckets, with status indicators for health and activity. Text in the image includes Cluster Overview, Servers, Buckets, and Health. The interface is organized and visually clear, designed to help users monitor Couchbase performance and status. The environment feels professional and supportive, encouraging users to review system health and resource usage. alt-text#center](images/dashboard-1.png "Couchbase Dashboard") + +On the left sidebar, select **Buckets** to view your bucket list. -- On the left hand side select "Buckets" -- Press the "Add Bucket" in the upper right hand corner: +In the upper right corner, select **Add Bucket** to start creating a new bucket for benchmarking. -![Create Bucket](images/create-bucket-1.png "Create Bucket") +![Couchbase Web Console showing the Add Bucket screen. The main form displays fields for bucket name, bucket type, and memory quota. The sidebar is visible on the left, and the Add Bucket button is highlighted in the upper right corner. Transcribed text includes Bucket Name, Bucket Type, Memory Quota, and Add Bucket. The interface is organized and welcoming, designed to guide users through bucket creation. The environment feels supportive and clear, encouraging users to set up a new Couchbase bucket for benchmarking. alt-text#center](images/create-bucket-1.png "Create Bucket") -- Name the new bucket "benchmark" -- The bucket type will be "Couchbase" -- The Memory Quota can be set to "512 MB" +Now name your bucket `benchmark`. For **Bucket Type**, select **Couchbase**. Set the **Memory Quota** to **512 MB**. These settings help ensure your benchmarking results are accurate and consistent on your Arm-based GCP VM. -![Create Bucket](images/create-bucket-2.png "Create Bucket") +![Couchbase Web Console displaying the Add Bucket form with fields for Bucket Name, Bucket Type, and Memory Quota. The user is entering benchmark as the bucket name, selecting Couchbase as the bucket type, and setting the memory quota to 512 MB. The Add Bucket button is visible in the upper right corner. The sidebar on the left shows navigation options. Transcribed text includes Bucket Name, Bucket Type, Memory Quota, and Add Bucket. The environment is organized and supportive, guiding users through the process of creating a new bucket for benchmarking. alt-text#center](images/create-bucket-2.png "Create Bucket") | **Parameter** | **Value** | |----------------|-----------| @@ -77,14 +88,12 @@ Use the admin `username` (default is "Administrator") and `password` you created | **Bucket Type** | Couchbase | | **Memory Quota** | 512 MB | -- You should now see that your bucket has been created: +You should now see that your bucket has been created: -![Created Bucket](images/create-bucket-3.png "Created Bucket") +![Couchbase Web Console showing the newly created benchmark bucket listed under Buckets. The main panel displays a table with columns for Bucket Name, Bucket Type, and Memory Quota. The benchmark bucket appears with type Couchbase and memory quota 512 MB. The sidebar on the left provides navigation options. Visible text includes Bucket Name, Bucket Type, Memory Quota, and benchmark. The environment is organized and supportive, confirming successful bucket creation and encouraging users to continue with benchmarking tasks. alt-text#center](images/create-bucket-3.png "Created Bucket") -#### Additional notes about buckets in Couchbase +## Understanding buckets in Couchbase -- A **bucket** in Couchbase is like a **database** — it stores and manages your data. -- The **benchmark** bucket will be used for **load testing** and **performance benchmarking**. -- Setting the **RAM Quota** ensures Couchbase allocates sufficient memory for **in-memory data operations**, improving overall speed. +A bucket in Couchbase works like a database. It stores and organizes your data. In this guide, you created a benchmark bucket specifically for load testing and performance benchmarking. Setting the Memory Quota (RAM Quota) ensures Couchbase reserves enough memory for fast, in-memory data operations. This helps you get accurate performance results on your Arm-based GCP VM. You can now proceed to the next section for benchmarking to measure Couchbase's performance. diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/benchmarking.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/benchmarking.md index 4ecff50f41..c223dc0c46 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/benchmarking.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/benchmarking.md @@ -1,5 +1,5 @@ --- -title: Couchbase Benchmarking +title: Benchmark Couchbase weight: 7 ### FIXED, DO NOT MODIFY @@ -7,21 +7,21 @@ layout: learningpathall --- -## Couchbase Benchmark on GCP SUSE Arm64 VM -This section guides you through benchmarking Couchbase performance on a GCP SUSE Arm64 VM using the **official `cbc-pillowfight` tool** from Couchbase C SDK. -It involves installing dependencies, building the SDK, verifying the setup, and running the benchmark test. +## Overview +This section guides you through benchmarking Couchbase performance on a GCP SUSE Arm64 VM using the official `cbc-pillowfight` tool from Couchbase C SDK. It involves installing dependencies, building the SDK, verifying the setup, and running the benchmark test. -### Install Build Tools & Dependencies -Before compiling the Couchbase SDK, install all required development tools and libraries. +## Install build tools and dependencies +Before compiling the Couchbase SDK, install all the required development tools and libraries: ```console sudo zypper install -y gcc gcc-c++ cmake make git openssl-devel libevent-devel cyrus-sasl-devel java ``` -### Download and Build the Couchbase C SDK (includes cbc-pillowfight) -`cbc-pillowfight` is a Couchbase command-line benchmarking tool that simulates a workload by performing concurrent read and write operations on a bucket to test Couchbase cluster performance. +## Download and build the Couchbase C SDK (includes cbc-pillowfight) -Clone the official Couchbase C SDK repository from GitHub. This SDK includes benchmarking tools such as `cbc` and `cbc-pillowfight`. +To get the benchmarking tools, download the official Couchbase C SDK source code. This SDK provides both the `cbc` command-line client and the `cbc-pillowfight` benchmarking utility. + +First, move to your home directory and clone the repository: ```console cd ~ @@ -29,7 +29,7 @@ git clone https://github.com/couchbase/libcouchbase.git cd libcouchbase ``` -**Then build and install:** +Next, build and install the SDK: ```console mkdir build && cd build @@ -38,8 +38,40 @@ make -j$(nproc) sudo make install ``` -### Update the Dynamic Linker Configuration -After installation, tell the system where to find the Couchbase libraries. +This process compiles the SDK and installs the binaries to `/usr/local/bin`. You can now use `cbc` and `cbc-pillowfight` for benchmarking Couchbase performance on your Arm64 VM. `cbc-pillowfight` is a Couchbase command-line benchmarking tool that simulates a workload by performing concurrent read and write operations on a bucket to test Couchbase cluster performance. + +Now clone the official Couchbase C SDK repository from GitHub. This SDK includes benchmarking tools such as `cbc` and `cbc-pillowfight`: + +```console +cd ~ +git clone https://github.com/couchbase/libcouchbase.git +cd libcouchbase +``` + +To build and install, use the following: + +```console +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Release +make -j$(nproc) +sudo make install +``` + +## Update the dynamic linker configuration + +After installing the Couchbase C SDK, you need to update the dynamic linker configuration so your system can locate the Couchbase libraries. Add the library path to the linker configuration file: + +```console +echo "/usr/local/lib" | sudo tee /etc/ld.so.conf.d/libcouchbase.conf +``` + +Next, refresh the linker cache to apply the changes: + +```console +sudo ldconfig +``` + +This step ensures that applications can find and use the Couchbase libraries installed on your Arm64 VM. After installation, tell the system where to find the Couchbase libraries: ```console echo "/usr/local/lib" | sudo tee /etc/ld.so.conf.d/libcouchbase.conf @@ -51,16 +83,16 @@ Then refresh the linker cache to make the libraries available system-wide: sudo ldconfig ``` -### Verify Installation -After installation, the tools like **cbc**, **cbc-pillowfight**, etc. should be available in `/usr/local/bin`. +## Verify installation +After installation, the tools such as `cbc` and `cbc-pillowfight` should be available in `/usr/local/bin`. -**Verify with:** +Verify this with: ```console cbc version cbc-pillowfight --help ``` -For the "cbc version" command, you should see an output similar to: +For the `cbc version` command, you should see an output similar to: ```output cbc: Runtime: Version=3.3.18, Changeset=a8e17873d167ec75338a358e54cec3994612d260 @@ -78,11 +110,26 @@ cbc: CC: GNU 7.5.0; -fno-strict-aliasing -ggdb3 -pthread CXX: GNU 7.5.0; -fno-strict-aliasing -ggdb3 -pthread ``` +For the `cbc-pillowfight --help` command, you should see the help menu for `cbc-pillowfight` displayed. The output is similar to: -For the "cbc-pillowfight --help" command, you should see the "help" menu displayed for cbc-pillowfight. +```output +cbc-pillowfight - Simulate workload for Couchbase buckets + +Usage: cbc-pillowfight [options] +Options: +-U Connection string to Couchbase bucket +-u Couchbase admin username +-P Couchbase admin password +-I Number of items (documents) +-B Batch size for operations +-t Number of concurrent threads +-c Number of operation cycles +--help Show this help message and exit +``` + +## Run Benchmark using cbc-pillowfight +Once Couchbase Server is running and you've created a bucket (for example, `benchmark`), you're ready to test performance. Run the following command, replacing `password` with your Couchbase Administrator password: -### Run Benchmark using cbc-pillowfight -Once Couchbase Server is running and a bucket (e.g., `benchmark`) is created, you can run a workload test using the following command (use your Couchbase administrators password): ```console cbc-pillowfight -U couchbase://127.0.0.1/benchmark \ @@ -107,33 +154,38 @@ Thread 3 has finished populating. Thread 4 has finished populating. ``` -### Monitoring During Test -While the benchmark runs, open the Couchbase Web Console in your browser: +## Monitor Couchbase performance in real time -```bash +While the benchmark is running, open the Couchbase web console in your browser by entering the following address, replacing `` with your VM's IP address: + +```console http://:8091 ``` -**Navigate to**: -**Dashboard → Buckets → benchmark** +Select **Dashboard**, then **Buckets**, and choose the `benchmark` bucket. Here, you can observe live performance metrics, including: + +- Ops/sec: Number of operations per second, which should closely match the CLI output from `cbc-pillowfight`. +- Resident ratio: Percentage of data served from memory, indicating memory efficiency. +- Disk write queue: Number of pending write operations, useful for spotting disk bottlenecks. +- CPU and memory usage: Shows how effectively Arm cores are handling the workload. + +Monitoring these metrics helps you validate Couchbase performance and resource utilization on your Arm64 VM. + +![Couchbase Dashboard showing benchmark metrics centered on the benchmark bucket. The dashboard displays real-time statistics including operations per second, resident ratio, RAM usage, and disk usage. The environment is a web-based interface with navigation menus on the left and performance graphs in the main panel. Visible text includes Ops/sec 227981.1, Resident Ratio 100 percent, RAM Used 36.8 MiB of 1 GiB, and Disk Used 26.7 MiB. The overall tone is analytical and focused on performance monitoring. alt-text#center](images/arm-benchmark.png "Monitor Benchmark Log") + +## Benchmark results on Arm64 VM -Monitor real-time performance metrics such as: -- **Ops/sec** — should match your CLI output -- **Resident ratio** — how much data stays in memory -- **Disk write queue** — backlog of writes to disk -- **CPU and memory usage** — tells you how well ARM cores are handling load +The following table summarizes the benchmark results from running `cbc-pillowfight` on a `c4a-standard-4` (4 vCPU, 16 GB memory) Arm64 VM in Google Cloud Platform (GCP) with SUSE Linux: -![Couchbase Dashboard alt-text#center](images/arm-benchmark.png "Monitor Benchmark Log") +| Name | Items | Resident | Ops/sec | RAM Used / Quota | Disk Used | +|------------|-----------|--------------|-------------|----------------------|---------------| +| benchmark | 10,000 | 100% | 227,981.1 | 36.8 MiB / 1 GiB | 26.7 MiB | -### Benchmark summary -Results from the earlier run on the `c4a-standard-4` (4 vCPU, 16 GB memory) Arm64 VM in GCP (SUSE): +The key takeaways here are: -| **Name** | **Items** | **Resident** | **Ops/sec** | **RAM Used / Quota** | **Disk Used** | -|---------------|------------|---------------|---------------|-----------------------|---------------| -| benchmark | 10,000 | 100% | 227,981.1 | 36.8 MiB / 1 GiB | 26.7 MiB | + - The benchmark achieved a throughput of 227,981.1 operations per second, demonstrating strong performance on Arm64. + - The 100 percent resident ratio indicates that all data was served from memory, which minimized disk access. + - Resource usage remained low, with only 36.8 MiB of RAM and 26.7 MiB of disk consumed, both well within the allocated quotas. + - Overall, Couchbase on this Arm64 VM delivered efficient, high-speed operations while using minimal resources. -- **Operations per Second:** 227,981.1 ops/sec — indicates high throughput -- **Resident Ratio:** 100% — all data served directly from memory -- **RAM Usage:** 36.8 MiB used out of 1 GiB quota — highly efficient memory utilization -- **Disk Usage:** 26.7 MiB — minimal disk consumption -- **Overall:** The Couchbase bucket performed efficiently with strong in-memory performance and low resource usage. +You can use these results as a baseline for further tuning or to compare with other VM sizes and configurations. diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/firewall_setup.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/firewall_setup.md index 83cc7be2df..dd6e350a73 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/firewall_setup.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/firewall_setup.md @@ -1,5 +1,5 @@ --- -title: Create a Firewall Rule on GCP +title: Create a firewall rule on GCP weight: 3 ### FIXED, DO NOT MODIFY @@ -8,35 +8,45 @@ layout: learningpathall ## Overview -In this section, you will learn how to create a Firewall Rule within Google Cloud Console. For this learning path, we need to expose TCP port 8091. +In this section you'll learn how to create a firewall rule in Google Cloud Console to allow traffic on TCP port 8091. This step is required for the Learning Path to ensure your Couchbase deployment is accessible. {{% notice Note %}} -For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](https://learn.arm.com/learning-paths/servers-and-cloud-computing/csp/google/). +If you need help setting up Google Cloud Platform (GCP), see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/). {{% /notice %}} -## Create a Firewall Rule in GCP +## Create a firewall rule in GCP +To expose TCP port 8091 for Couchbase, start by creating a new firewall rule in Google Cloud Console: -For this learning path, we need to expose TCP port 8091. To accomplish this, we first need to create a firewall rule. -- Navigate to the [Google Cloud Console](https://console.cloud.google.com/). -- Go to **VPC Network > Firewall** and press **Create firewall rule**. +- Open the [Google Cloud Console](https://console.cloud.google.com/). +- In the navigation menu, select **VPC network** > **Firewall**. +- Select **Create firewall rule**. -![Create a firewall rule](images/firewall-rule.png "Create a firewall rule") +You'll use this rule to allow incoming traffic on TCP port 8091, which is required for Couchbase access on your Arm-based VM. -- Next, we create the firewall rule that will expose TCP port 8091 for our learning path. -- Set the "Name" of the new rule to "allow-tcp-8091" -- Select your network that you intend to bind to your VM (default is "autoscaling-net" but your organization might have others that you need to use) -- Direction of traffic should be set to "Ingress" -- Allow on match should be set to "Allow" and the "Targets" should be set to "Specified target tags". -- Enter "allow-tcp-8091" to the "Target tags" text field -- Set the "Source IPv4 ranges" text value to "0.0.0.0/0" +![Google Cloud Console showing the Create firewall rule page with fields for Name set to allow-tcp-8091, Network dropdown, Direction set to Ingress, Action set to Allow, Targets set to Specified target tags with allow-tcp-8091 entered, and Source IPv4 ranges set to 0.0.0.0/0. The interface is clean and organized, focusing on configuring firewall settings for a virtual machine. The overall tone is neutral and instructional. alt-text#center](images/firewall-rule.png "Create a firewall rule") -![Create a firewall rule](images/network-rule.png "Creating the TCP/8091 firewall rule") +- Set **Name** to `allow-tcp-8091`. +- Select the network you want to use for your VM. The default is `autoscaling-net`, but your organization might use a different network. +- Set **Direction of traffic** to **Ingress**. +- Set **Action on match** to **Allow**. +- For **Targets**, select **Specified target tags** and enter `allow-tcp-8091` in the **Target tags** field. +- In **Source IPv4 ranges**, enter `0.0.0.0/0`. -- Lastly, we select "Specified protocols and ports" under the "Protocols and ports" section -- Select the "TCP" checkbox -- Enter "8091" in the "Ports" text field -- Press "Create" +This configuration allows incoming TCP traffic on port 8091 from any IPv4 address. -![Specifying the TCP port to expose](images/network-port.png "Specifying the TCP port to expose") +![Google Cloud Console interface displaying the Create firewall rule page. The main section shows fields for Name set to allow-tcp-8091, Network dropdown, Direction set to Ingress, Action set to Allow, Targets set to Specified target tags with allow-tcp-8091 entered, and Source IPv4 ranges set to 0.0.0.0/0. The environment is a clean, organized web dashboard focused on configuring firewall settings for a virtual machine. The tone is neutral and instructional. All visible text is transcribed in the description. alt-text #center](images/network-rule.png "Creating the TCP/8091 firewall rule") -Our network firewall rule is now created so we can continue with the VM creation! \ No newline at end of file +## Specify protocols and ports + +Next, configure the protocols and ports for your firewall rule: + +- Under **Protocols and ports**, select **Specified protocols and ports**. +- Check the **TCP** box. +- In the **Ports** field, enter `8091`. +- Select **Create** to finish adding the firewall rule. + +This step ensures that only TCP traffic on port 8091 is allowed through the firewall. + +![Google Cloud Console showing the Protocols and ports section of the Create firewall rule page. The TCP checkbox is selected and the Ports field contains 8091. The interface is part of a clean, organized web dashboard for configuring firewall settings. Visible text includes Protocols and ports, Specified protocols and ports, TCP, and Ports 8091. The tone is neutral and instructional. alt-text#center](images/network-port.png "Specifying the TCP port to expose") + +Your network firewall rule has now been created. You're ready to continue with VM creation. \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/installation.md index afa13edf7d..a26179295e 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/installation.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/installation.md @@ -7,26 +7,26 @@ layout: learningpathall --- ## Install Couchbase on GCP VM -This section explains how to install and configure **Couchbase Server** on a GCP Linux VM (SUSE or RHEL-based). -Follow the steps below carefully to ensure a successful setup. +This section walks you through how to install and configure Couchbase Server on a GCP Linux VM (SUSE or RHEL-based). -### System Preparation -Before installing Couchbase, update the system and install the required tools. +To ensure a successful setup, follow each step in order and check the output after each command. This helps you catch issues early and confirms that Couchbase is installed and running correctly. + +## Set up your environment +Before installing Couchbase, update the system and install the required tools: ```console sudo zypper refresh sudo zypper update -y sudo zypper install -y curl wget net-tools lsb-release ``` -### Download Couchbase Server -Download the Couchbase Server package for ARM64 architecture. +## Download Couchbase server +Download the Couchbase server package for ARM64 architecture. ```console cd ~ wget -O couchbase-server-8.0.0-linux.aarch64.rpm \ https://packages.couchbase.com/releases/8.0.0/couchbase-server-community-8.0.0-linux.aarch64.rpm ``` -**Verify the downloaded file:** After downloading, verify that the file exists and check its size. ```console @@ -34,16 +34,17 @@ ls -lh couchbase-server-8.0.0-linux.aarch64.rpm ``` This helps confirm the file was downloaded correctly and not truncated or corrupted. -### Install Couchbase Server -Install the downloaded Couchbase RPM package. +## Install Couchbase server + +Now that you've downloaded the Couchbase Server RPM, install it using the following command. This step sets up Couchbase and prepares all required directories, binaries, and services: ```console sudo rpm -ivh couchbase-server-8.0.0-linux.aarch64.rpm ``` -- **rpm -ivh** → Installs the RPM package, displaying verbose output and progress (v for verbose, h for hash marks). -- This command installs Couchbase and sets up the necessary directories, binaries, and services. -**Confirm that Couchbase has been installed successfully:** +The `rpm -ivh` command installs the package, shows verbose output, and displays progress with hash marks. If the installation completes without errors, Couchbase Server is ready for configuration. + +Confirm that Couchbase has been installed successfully: ```console rpm -qa | grep couchbase @@ -52,15 +53,15 @@ You should see an output similar to: ```output couchbase-server-community-8.0.0-3777.aarch64 ``` -### Start Couchbase Service -Start and enable the Couchbase service so that it runs automatically on startup. +## Start Couchbase service +Start and enable the Couchbase service so that it runs automatically on startup: ```console sudo systemctl start couchbase-server sudo systemctl enable couchbase-server ``` -**Verify service status:** +## Verify service status: ```console sudo systemctl status couchbase-server ``` @@ -70,25 +71,36 @@ You should see the following snippet as part of your output: Active: active(running) since YYY XXXX-XX-XX ``` -### Check Required Ports -This command checks if those ports are open and active. If you see “LISTEN” next to these ports, it means Couchbase is ready to accept connections. +## Check required ports -Couchbase uses the following ports for basic operation: +To confirm Couchbase is ready to accept connections, check that the required ports are open and listening. If you see "LISTEN" next to these ports, Couchbase is running and network services are available. -- Web Console: `8091` -- Query Service: `8093` (optional for N1QL queries) -- Data Service: `11210` +Couchbase uses these ports for core functions: -Check if the ports are listening: +- Web console: `8091` +- Data service: `11210` +- Query service: `8093` (for N1QL queries, optional) + +Run the following command to verify the ports are active: ```console -sudo ss -tuln | grep -E '8091|11210' +sudo ss -tuln | grep -E '8091|11210|8093' +``` + +The output is similar to: + +```output +tcp LISTEN 0 128 0.0.0.0:8091 0.0.0.0:* +tcp LISTEN 0 1024 0.0.0.0:11210 0.0.0.0:* +tcp LISTEN 0 1024 [::]:11210 [::]:* ``` +If you see "LISTEN" for these ports, Couchbase is ready for baseline testing and further configuration. This confirms that the core Couchbase services are running and accessible on your Arm-based GCP VM. + ```output tcp LISTEN 0 128 0.0.0.0:8091 0.0.0.0:* tcp LISTEN 0 1024 0.0.0.0:11210 0.0.0.0:* tcp LISTEN 0 1024 [::]:11210 [::]:* ``` -Once the **installation and setup are complete**, you can now proceed to the **baseline testing** phase. +Once you've finished installing and setting up Couchbase, you're ready to move on to baseline testing. This next phase checks that your Couchbase Server is running correctly and ready for use. diff --git a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/instance.md index 11cd46284a..afe7a52935 100644 --- a/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/instance.md +++ b/content/learning-paths/servers-and-cloud-computing/couchbase-on-gcp/instance.md @@ -24,26 +24,26 @@ To create a virtual machine based on the C4A instance type: - Set **Series** to `C4A`. - Select `c4a-standard-4` for machine type. - ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") +![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") -- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. -- If using use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type. -- Once appropriately selected, please Click **Select**. +- Under **OS and Storage**, select **Change**. Choose an Arm64-based OS image. For this Learning Path, select **SUSE Linux Enterprise Server**. +- For the license type, select **Pay As You Go**. +- Click **Select** to confirm your OS choice. - Under **Networking**, enable **Allow HTTP traffic**. -- Also under **Networking**, in the "Network tags" text field add "allow-tcp-8091" as an additional tag +- In the **Network tags** field, add `allow-tcp-8091` to allow Couchbase traffic. -![Adding the TCP/8091 firewall rule to our VM](images/network-config.png "Adding the TCP/8091 firewall rule to our VM") +![Screenshot showing the Google Cloud Console interface with the Networking tab open for a VM instance. The primary subject is the configuration of a firewall rule allowing TCP traffic on port 8091. The Network tags field contains allow-tcp-8091. The wider environment includes other VM configuration options and navigation menus. The tone is neutral and instructional. Visible text includes Network tags and allow-tcp-8091. alt-text#center](images/network-config.png "Adding the TCP/8091 firewall rule to our VM") -- Click **Create** to launch the instance. -- Once created, you should see a "SSH" option to the right in your list of VM instances. You should also see the public IP address for your VM. -- Save off the public IP address for your VM as you will need this in the next step. -- Click on this to launch a SSH shell into your VM instance: +- Select **Create** to launch your VM instance. +- After the VM is ready, you'll see an **SSH** button next to your instance in the VM list. The public IP address for your VM also appears here. +- Copy the public IP address—you'll need it later to connect to Couchbase. +- Select **SSH** to open a shell session directly in your browser. -![Invoke a SSH session via your browser alt-text#center](images/gcp-pubip-ssh.png "Invoke a SSH session into your running VM instance") +![Screenshot showing the Google Cloud Console interface with the SSH option highlighted for a running VM instance. The primary subject is the SSH button next to the VM, indicating how to open a shell session in the browser. The wider environment includes the VM instance list, navigation menus, and status indicators. Visible text includes SSH and the public IP address for the VM. The tone is neutral and instructional. alt-text#center](images/gcp-pubip-ssh.png "Invoke a SSH session into your running VM instance") -- A window from your browser should come up and you should now see a shell into your VM instance: +A window from your browser should come up and you should now see a shell into your VM instance: -![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") +![Screenshot of a terminal shell open in a browser window, displaying a command prompt for a SUSE Linux Enterprise Server VM instance on Google Cloud Platform. The primary subject is the active shell session, ready for user input. The wider environment includes browser interface elements and navigation menus. Visible text includes the command prompt and system information. The tone is neutral and instructional. alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") Next, let's install Couchbase! \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/django-on-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/django-on-gcp/instance.md index 3291e8d4bb..e708afde53 100644 --- a/content/learning-paths/servers-and-cloud-computing/django-on-gcp/instance.md +++ b/content/learning-paths/servers-and-cloud-computing/django-on-gcp/instance.md @@ -46,4 +46,4 @@ To create a virtual machine based on the C4A instance type: ![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") -Next, let's install Couchbase! \ No newline at end of file +Next, let's install django! \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/dlrm/_index.md b/content/learning-paths/servers-and-cloud-computing/dlrm/_index.md index 555b262e18..2dcdc1bec4 100644 --- a/content/learning-paths/servers-and-cloud-computing/dlrm/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/dlrm/_index.md @@ -14,6 +14,7 @@ prerequisites: - Any [Arm-based instance](/learning-paths/servers-and-cloud-computing/csp/) from a cloud service provider (CSP), or an on-premise Arm server with at least 400GB of RAM and 800 GB of disk space. author: + - Phalani Paladugu - Annie Tallund - Pareena Verma diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md new file mode 100644 index 0000000000..593e1b8019 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_index.md @@ -0,0 +1,64 @@ +--- +title: Deploy Helm on Google Cloud C4A (Arm-based Axion VMs) + +minutes_to_complete: 30 + +draft: true +cascade: + draft: true + +who_is_this_for: This learning path is intended for software developers deploying and optimizing Helm on Linux/Arm64 environments, specifically using Google Cloud C4A virtual machines powered by Axion processors. + +learning_objectives: + - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors) + - Install Helm and kubectl on a SUSE Arm64 (C4A) instance + - Create and validate a local Kubernetes cluster (KinD) on Arm64 + - Verify Helm functionality by performing install, upgrade, and uninstall workflows + - Benchmark Helm concurrency behavior using parallel Helm CLI operations on Arm64 + +prerequisites: + - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled + - Basic familiarity with [Kubernetes concepts](https://kubernetes.io/docs/concepts/) + - Basic understanding of [Helm](https://helm.sh/docs/topics/architecture/) and Kubernetes manifests + +author: Pareena Verma + +##### Tags +skilllevels: Introductory +subjects: Containers and Virtualization +cloud_service_providers: Google Cloud + +armips: + - Neoverse + +tools_software_languages: + - Helm + - Kubernetes + - KinD + +operatingsystems: + - Linux + +# ================================================================================ +# FIXED, DO NOT MODIFY +# ================================================================================ +further_reading: + - resource: + title: Google Cloud documentation + link: https://cloud.google.com/docs + type: documentation + + - resource: + title: Helm documentation + link: https://helm.sh/docs/ + type: documentation + + - resource: + title: Kubernetes documentation + link: https://kubernetes.io/docs/ + type: documentation + +weight: 1 +layout: "learningpathall" +learning_path_main_page: "yes" +--- diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_next-steps.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/background.md new file mode 100644 index 0000000000..92424cf680 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/background.md @@ -0,0 +1,27 @@ +--- +title: Getting started with Helm on Google Axion C4A (Arm Neoverse-V2) + +weight: 2 + +layout: "learningpathall" +--- + +## Google Axion C4A Arm instances in Google Cloud + +Google Axion C4A is a family of Arm-based virtual machines built on Google’s custom Axion CPU, which is based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these virtual machines offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications. + +The C4A series provides a cost-effective alternative to x86 virtual machines while leveraging the scalability and performance benefits of the Arm architecture in Google Cloud. + +To learn more about Google Axion, refer to the [Introducing Google Axion Processors, our new Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog. + +## Helm + +Helm is the package manager for Kubernetes that simplifies application deployment, upgrades, rollbacks, and lifecycle management using reusable **charts**. + +It allows teams to deploy applications consistently across environments and automate Kubernetes workflows. + +Helm runs as a lightweight CLI and integrates directly with the Kubernetes API, making it well-suited for Arm-based platforms such as Google Axion C4A. + +It works efficiently on both x86 and Arm64 architectures and is widely used in production Kubernetes environments. + +Learn more at the official [Helm website](https://helm.sh/). diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/baseline.md new file mode 100644 index 0000000000..b74848cf0f --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/baseline.md @@ -0,0 +1,111 @@ +--- +title: Helm Baseline Testing on Google Axion C4A Arm Virtual Machine +weight: 5 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Helm Baseline Testing on GCP SUSE VMs +This guide walks you through baseline testing to confirm that Helm works correctly on an Arm64-based Kubernetes cluster by validating core workflows such as install, upgrade, and uninstall. + +### Add Helm Repository +Add the Bitnami Helm chart repository and update the local index: + +```console +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo update +``` + +You should see an output similar to: +```output +"bitnami" has been added to your repositories +Hang tight while we grab the latest from your chart repositories... +...Successfully got an update from the "bitnami" chart repository +Update Complete. ⎈Happy Helming!⎈ +``` + +### Install a Sample Application +Install a sample NGINX application using a Helm chart: + +```console +helm install nginx bitnami/nginx +``` +Deploy a simple test app to validate that Helm can create releases on the cluster. + +You should see an output that contains text similar to this (please ignore any WARNINGS you receive): +```output +NAME: nginx +LAST DEPLOYED: Wed Dec 3 07:34:04 2025 +NAMESPACE: default +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +CHART NAME: nginx +CHART VERSION: 22.3.3 +APP VERSION: 1.29.3 +``` + + +### Validate Deployment +Verify that the Helm release is created: + +```console +helm list +``` + +Confirm Helm recorded the release and that the deployment exists. + +You should see an output similar to: +```output +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +nginx default 1 2025-12-09 21:04:15.944165326 +0000 UTC deployed nginx-22.3.3 1.29.3 +``` + +Check Kubernetes resources: + +```console +kubectl get pods +kubectl get svc +``` +You should see an output similar to: +```output +NAME READY STATUS RESTARTS AGE +nginx-7b9564dc4b-2ghkw 1/1 Running 0 3m5s + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +kubernetes ClusterIP 10.96.0.1 443/TCP 4m28s +nginx LoadBalancer 10.96.216.137 80:32708/TCP,443:31052/TCP 3m6s +``` +All pods should be in the **Running** state. If the pods are in **Pending** state, please wait a bit and retry the commands above. + + +### Validate Helm Lifecycle +This step confirms that Helm supports the full application lifecycle on Arm64. + +#### Upgrade the Release + +```console +helm upgrade nginx bitnami/nginx +``` +Test Helm's ability to update an existing release to a new revision. + +You should see an output similar (towards the top of the output...) to: +```output +Release "nginx" has been upgraded. Happy Helming! +``` + +#### Uninstall the Release +Ensure Helm can cleanly remove the release and associated resources. + +```console +helm uninstall nginx +``` + +You should see an output similar to: +```output +release "nginx" uninstalled +``` +This confirms the successful execution of **install**, **upgrade**, and **delete** workflows using Helm on Arm64. +Helm is fully functional on the Arm64 Kubernetes cluster and ready for further experimentation or benchmarking. diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md new file mode 100644 index 0000000000..c1daaabe2e --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/benchmarking.md @@ -0,0 +1,174 @@ +--- +title: Helm Benchmarking +weight: 6 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + + +## Helm Benchmark on GCP SUSE Arm64 VM +This guide explains **how to benchmark Helm on an Arm64-based GCP SUSE VM** using only the **Helm CLI**. +Since Helm does not provide built-in performance metrics, we measure **concurrency behavior** by running multiple Helm commands in parallel and recording the total execution time. + +### Prerequisites +Before starting the benchmark, ensure Helm is installed and the Kubernetes cluster is accessible. + +```console +helm version +kubectl get nodes +``` + +All nodes should be in `Ready` state. + + +### Add Helm Repository +Helm installs applications using “charts.” +This step tells Helm where to download those charts from and updates its local chart list. + +```console +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo update +``` + +### Create Benchmark Namespace +Isolate benchmark workloads from other cluster resources. + +```console +kubectl create namespace helm-bench +``` + +### Warm-Up Run (Recommended) +This step prepares the cluster by pulling container images and initializing caches. + +```console +helm install warmup bitnami/nginx \ + -n helm-bench \ + --set service.type=ClusterIP \ + --timeout 10m +``` +The first install is usually slower because of following reasons: + +- Images must be downloaded. +- Kubernetes initializes internal objects. + +This warm-up ensures the real benchmark measures Helm performance, not setup overhead. + +You should see output (near the top of the output) that is simlar to: +```output +NAME: warmup +LAST DEPLOYED: Tue Dec 9 21:10:44 2025 +NAMESPACE: helm-bench +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +CHART NAME: nginx +CHART VERSION: 22.3.3 +APP VERSION: 1.29.3 +``` + +**After validation, remove the warm-up deployment:** + +```console +helm uninstall warmup -n helm-bench +``` + +{{% notice Note %}} +Helm does not provide native concurrency or throughput metrics. Concurrency benchmarking is performed by executing multiple Helm CLI operations in parallel and measuring overall completion time. +{{% /notice %}} + +### Concurrent Helm Install Benchmark (No Wait) +Run multiple Helm installs in parallel using background jobs. + +```console +time ( +for i in {1..5}; do + helm install nginx-$i bitnami/nginx \ + -n helm-bench \ + --set service.type=ClusterIP \ + --timeout 10m & +done +wait +) +``` +This step simulates multiple teams deploying applications at the same time. +Helm submits all requests without waiting for pods to fully start. + +What this measures: + +* Helm concurrency handling +* Kubernetes API responsiveness +* Arm64 client-side performance + +You should see an output similar to: +```output +real 0m3.998s +user 0m12.798s +sys 0m0.339s +``` + +### Verify Deployments + +This confirms: + +- Helm reports that all components were installed successfully +- Kubernetes actually created and started the applications + +```console +helm list -n helm-bench +kubectl get pods -n helm-bench +``` + +Expected: + +* All releases in `deployed` state +* Pods in `Running` status + +### Concurrent Helm Install Benchmark (With `--wait`) +This benchmark includes workload readiness time. + +```console +time ( +for i in {1..3}; do + helm install nginx-wait-$i bitnami/nginx \ + -n helm-bench \ + --set service.type=ClusterIP \ + --wait \ + --timeout 15m & +done +wait +) +``` + +What this measures: + +* Helm concurrency plus scheduler and image-pull contention +* End-to-end readiness impact + +You should see an output similar to: +```output +real 0m12.924s +user 0m7.333s +sys 0m0.312s +``` + +### Metrics to Record + +- **Total elapsed time**: Overall time taken to complete all installs. +- **Number of parallel installs**: Number of Helm installs run at the same time. +- **Failures**: Any Helm failures or Kubernetes API errors. +- **Pod readiness delay**: Time pods take to become Ready (resource pressure) + +### Benchmark summary +Results from the earlier run on the `c4a-standard-4` (4 vCPU, 16 GB memory) Arm64 VM in GCP (SUSE): + +| Test Case | Parallel Installs | `--wait` Used | Timeout | Total Time (real) | +| ---------------------------- | ----------------- | ------------- | ------- | ----------------- | +| Parallel Install (No Wait) | 5 | No | 10m | **3.99 s** | +| Parallel Install (With Wait) | 3 | Yes | 15m | **12.92 s** | + +- **Arm64 shows faster Helm execution** for both warm and ready states, indicating efficient CLI and Kubernetes API handling on Arm-based GCP instances. +- **The `--wait` flag significantly increases total execution time** because Helm waits for pods and services to reach a Ready state, revealing scheduler latency and image-pull delays rather than Helm CLI overhead. +- **Parallel Helm installs scale well on Arm64**, with minimal contention observed even at higher concurrency levels. +- **End-to-end workload readiness dominates benchmark results**, showing that cluster resource availability and container image pulls diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-shell.png b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-shell.png new file mode 100644 index 0000000000..7e2fc3d1b5 Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-shell.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-ssh.png b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-ssh.png new file mode 100644 index 0000000000..597ccd7fea Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-ssh.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-vm.png b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-vm.png new file mode 100644 index 0000000000..0d1072e20d Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/images/gcp-vm.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/installation.md new file mode 100644 index 0000000000..3d8e1828df --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/installation.md @@ -0,0 +1,125 @@ +--- +title: Install Helm +weight: 4 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Install Helm on GCP VM +This section covers preparing a SUSE Arm64 system and installing the required tools to work with Helm using a local Kubernetes cluster created with KinD. + +### System Preparation +Update the system and install basic dependencies: + +```console +sudo zypper refresh +sudo zypper update -y +sudo zypper install -y curl git tar gzip +``` +### Enable SUSE Containers Module +This enables SUSE’s official container support, so Docker and container tools can work properly. +``` console +sudo SUSEConnect -p sle-module-containers/15.5/arm64 +sudo SUSEConnect --list-extensions | grep Containers +``` +You should see "Activated" as part of the output from the above commands. + +### Install Docker +Docker is required to run KinD and Kubernetes components as containers. This step installs Docker, starts it, and allows your user to run Docker without sudo. +``` console +sudo zypper refresh +sudo zypper install -y docker +sudo systemctl enable --now docker +sudo usermod -aG docker $USER +exit +``` + +Next, re-open a new shell into your VM and type the following: + +```console +docker ps +``` + +You should see the following output: + +```output +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +``` + +### Install kubectl +This step installs kubectl, the command-line tool used to interact with Kubernetes clusters, compiled for the Arm64 architecture. + +```console +curl -LO https://dl.k8s.io/release/v1.30.1/bin/linux/arm64/kubectl +chmod +x kubectl +sudo mv kubectl /usr/local/bin/ +``` + +### Verify Installation + +This step confirms that `kubectl` is installed correctly and accessible from the command line. + +```console +kubectl version --client +``` + +You should see an output similar to: +```output +Client Version: v1.30.1 +Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3 +``` + +### Install Helm +This step installs Helm using the official Helm installation script, ensuring you get a verified and up-to-date release. + +```console +curl -sSfL https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 > get_helm.sh +chmod 755 ./get_helm.sh +./get_helm.sh +``` + +### Verify Installation +This step confirms that Helm is installed correctly and ready to be used. + +```console +helm version +``` + +You should see an output similar to: +```output +version.BuildInfo{Version:"v3.19.2", GitCommit:"8766e718a0119851f10ddbe4577593a45fadf544", GitTreeState:"clean", GoVersion:"go1.24.9"} +``` + +### Create a Local Kubernetes Cluster (KinD) +This step installs KinD (Kubernetes-in-Docker), which allows you to run a lightweight Kubernetes cluster locally on your Arm64 VM. + +```console +curl -Lo kind https://kind.sigs.k8s.io/dl/v0.30.0/kind-linux-arm64 +chmod +x kind +sudo mv kind /usr/local/bin/ +``` + +**Create a local Kubernetes cluster:** + +This step creates a local Kubernetes cluster named helm-lab that will be used to deploy Helm charts. + +```console +kind create cluster --name helm-lab +``` + +### Verify Cluster Status +This step verifies that the Kubernetes cluster is operating correctly and is fully prepared to run workloads. + +```console +kubectl get nodes +``` + +You should see an output similar to: +```output +NAME STATUS ROLES AGE VERSION +helm-lab-control-plane Ready control-plane 23h v1.34.0 +``` +The node should be in the **Ready** state. If not, please retry the command again. + +You now have a fully working local Kubernetes environment on Arm64, ready for deploying applications using Helm. diff --git a/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/instance.md new file mode 100644 index 0000000000..5a93ed9c0e --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/helm-on-gcp/instance.md @@ -0,0 +1,44 @@ +--- +title: Create a Google Axion C4A Arm virtual machine on GCP +weight: 3 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Overview + +In this section, you will learn how to provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console. + +{{% notice Note %}} +For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](https://learn.arm.com/learning-paths/servers-and-cloud-computing/csp/google/). +{{% /notice %}} + +## Provision a Google Axion C4A Arm VM in Google Cloud Console + +To create a virtual machine based on the C4A instance type: +- Navigate to the [Google Cloud Console](https://console.cloud.google.com/). +- Go to **Compute Engine > VM Instances** and select **Create Instance**. +- Under **Machine configuration**: + - Populate fields such as **Instance name**, **Region**, and **Zone**. + - Set **Series** to `C4A`. + - Select `c4a-standard-4` for machine type. + + ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") + + +- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. +- If using use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type. +- Edit the Disk size ("Size(GB)" Textfield...) below and change it from "10" to "50" to increase the disk size of the VM to 50 GB... +- Once appropriately selected and configured, please Click **Select**. +- Under **Networking**, enable **Allow HTTP traffic** as well as **Allow HTTPS traffic**. +- Click **Create** to launch the instance. +- Once created, you should see a "SSH" option to the right in your list of VM instances. Click on this to launch a SSH shell into your VM instance: + +![Invoke a SSH session via your browser alt-text#center](images/gcp-ssh.png "Invoke a SSH session into your running VM instance") + +- A window from your browser should come up and you should now see a shell into your VM instance: + +![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") + +Next, let's install Helm! \ No newline at end of file diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md new file mode 100644 index 0000000000..72051a667f --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_index.md @@ -0,0 +1,64 @@ +--- +title: Deploy RabbitMQ on Google Cloud C4A (Arm-based Axion VMs) + +draft: true +cascade: + draft: true + +minutes_to_complete: 30 + +who_is_this_for: This is an introductory topic for software engineers and platform engineers migrating messaging and event-driven workloads from x86_64 to Arm-based servers, specifically on Google Cloud C4A virtual machines powered by Axion processors. + +learning_objectives: + - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors) + - Install and configure RabbitMQ on a SUSE Arm64 (C4A) instance + - Validate RabbitMQ deployment using baseline messaging tests + - Implement real-world RabbitMQ use cases such as event-driven processing and notification pipelines + +prerequisites: + - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled + - Basic understanding of message queues and messaging concepts (publishers, consumers) + - Familiarity with Linux command-line operations + +author: Pareena Verma + +##### Tags +skilllevels: Introductory +subjects: Databases +cloud_service_providers: Google Cloud + +armips: + - Neoverse + +tools_software_languages: + - RabbitMQ + - Erlang + - Python + - pika + +operatingsystems: + - Linux + +# ================================================================================ +# FIXED, DO NOT MODIFY +# ================================================================================ +further_reading: + - resource: + title: Google Cloud documentation + link: https://cloud.google.com/docs + type: documentation + + - resource: + title: RabbitMQ documentation + link: https://www.rabbitmq.com/documentation.html + type: documentation + + - resource: + title: RabbitMQ Tutorials + link: https://www.rabbitmq.com/getstarted.html + type: documentation + +weight: 1 +layout: "learningpathall" +learning_path_main_page: "yes" +--- diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_next-steps.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_next-steps.md new file mode 100644 index 0000000000..c3db0de5a2 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/_next-steps.md @@ -0,0 +1,8 @@ +--- +# ================================================================================ +# FIXED, DO NOT MODIFY THIS FILE +# ================================================================================ +weight: 21 # Set to always be larger than the content in this path to be at the end of the navigation. +title: "Next Steps" # Always the same, html page title. +layout: "learningpathall" # All files under learning paths have this same wrapper for Hugo processing. +--- diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md new file mode 100644 index 0000000000..1d839a0496 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/background.md @@ -0,0 +1,25 @@ +--- +title: Getting started with RabbitMQ on Google Axion C4A (Arm Neoverse-V2) + +weight: 2 + +layout: "learningpathall" +--- + +## Google Axion C4A Arm instances in Google Cloud + +Google Axion C4A is a family of Arm-based virtual machines built on Google’s custom Axion CPU, which is based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these virtual machines offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications. + +The C4A series provides a cost-effective alternative to x86 virtual machines while leveraging the scalability and performance benefits of the Arm architecture in Google Cloud. + +To learn more about Google Axion, refer to the [Introducing Google Axion Processors, our new Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog. + +## RabbitMQ + +RabbitMQ is an open-source **message broker** that enables applications to communicate asynchronously using messaging patterns such as queues, publish/subscribe, and routing. It acts as an intermediary that reliably receives, stores, and forwards messages between producers and consumers. + +RabbitMQ helps decouple application components, improve scalability, and increase fault tolerance by ensuring messages are not lost and can be processed independently. It supports multiple messaging protocols, including AMQP, and provides features such as message durability, acknowledgments, routing via exchanges, and flexible delivery guarantees. + +RabbitMQ is widely used for **event-driven architectures**, **background job processing**, **microservices communication**, and **notification systems**. It integrates easily with many programming languages and platforms. + +Learn more from the [RabbitMQ official website](https://www.rabbitmq.com/) and the [official documentation](https://www.rabbitmq.com/documentation.html). diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md new file mode 100644 index 0000000000..b6f74f264e --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/baseline.md @@ -0,0 +1,138 @@ +--- +title: RabbitMQ Baseline Testing on Google Axion C4A Arm Virtual Machine +weight: 5 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## RabbitMQ Baseline Validation on GCP SUSE Arm64 VM +This document defines a **baseline validation procedure** for RabbitMQ installed on a **Google Cloud SUSE Linux Arm64 virtual machine**. +The purpose of this baseline is to confirm: + +- RabbitMQ service health +- Management plugin availability +- Queue operations (create, publish, consume) +- CLI tooling functionality (`rabbitmqctl` and `rabbitmqadmin`) + +### Check RabbitMQ Node Status +Verify that the RabbitMQ node is operational and healthy. + +```console +sudo rabbitmqctl status +``` +- Node status reports RabbitMQ is running +- No active alarms +- Listeners are active on ports 5672 and 15672 +- Memory and disk space are within safe limits + +### Verify Enabled Plugins +Confirm that the RabbitMQ management plugins are enabled. + +```console +sudo rabbitmq-plugins list | grep management +``` + +You should see an output similar to: +```output +[ ] rabbitmq_federation_management 4.2.0 +[E*] rabbitmq_management 4.2.0 +[e*] rabbitmq_management_agent 4.2.0 +[ ] rabbitmq_shovel_management 4.2.0 +[ ] rabbitmq_stream_management 4.2.0 +``` + +### Validate RabbitMQ Listeners +Ensure RabbitMQ is listening on the required ports. + +```console +sudo rabbitmqctl status | grep -A5 Listeners +``` + +You should see an output similar to: +```output +Listeners + +Interface: [::], port: 15672, protocol: http, purpose: HTTP API +Interface: [::], port: 25672, protocol: clustering, purpose: inter-node and CLI tool communication +Interface: [::], port: 5672, protocol: amqp, purpose: AMQP 0-9-1 and AMQP 1.0 +``` + +### Download RabbitMQ Admin CLI Tool +Download the rabbitmqadmin CLI tool from the local management endpoint. + +```console +curl -u guest:guest http://localhost:15672/cli/rabbitmqadmin -o rabbitmqadmin +``` +**Make the tool executable:** + +```console +chmod +x rabbitmqadmin +``` +### Validate Queue Creation +Create a test queue to validate write operations. + +```console +./rabbitmqadmin declare queue name=testqueue durable=false +``` + +You should see an output similar to: +```output +queue declared +``` + +### Publish a Test Message +Send a test message to the queue. + +```console +./rabbitmqadmin publish exchange=amq.default routing_key=testqueue payload="hello world" +``` + +You should see an output similar to: +```output +Message published +``` + +### Consume Message From Queue +Retrieve messages from the queue to verify read functionality. + +```console +./rabbitmqadmin get queue=testqueue +``` + +You should see an output similar to: +```output ++-------------+----------+---------------+-------------+---------------+------------------+------------+-------------+ +| routing_key | exchange | message_count | payload | payload_bytes | payload_encoding | properties | redelivered | ++-------------+----------+---------------+-------------+---------------+------------------+------------+-------------+ +| testqueue | | 0 | hello world | 11 | string | | False | ++-------------+----------+---------------+-------------+---------------+------------------+------------+-------------+ +``` + +### Verify Queue State +Confirm that the queue is empty after consumption. + +```console +./rabbitmqadmin list queues name messages +``` + +You should see an output similar to: +```output ++--------------+----------+ +| name | messages | ++--------------+----------+ +| jobs | 0 | +| order.events | 1 | +| testqueue | 1 | +``` + +Baseline Validation Summary + +- RabbitMQ node is running and healthy +- The management plugin is enabled and accessible +- Queue creation is successful +- Message publishing works as expected +- Message consumption functions correctly +- CLI tools operate without error + +This confirms a successful baseline validation of RabbitMQ on a GCP SUSE Arm64 virtual machine. diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/gcp-vm.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/gcp-vm.png new file mode 100644 index 0000000000..0d1072e20d Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/gcp-vm.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/rabbitmq.png b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/rabbitmq.png new file mode 100644 index 0000000000..675dd72d9c Binary files /dev/null and b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/images/rabbitmq.png differ diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md new file mode 100644 index 0000000000..9bc7a1e67d --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/installation.md @@ -0,0 +1,144 @@ +--- +title: Install RabbitMQ +weight: 4 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Install RabbitMQ on GCP SUSE Arm64 VM +This guide describes a **step-by-step installation of RabbitMQ** on a **Google Cloud Platform SUSE Linux Arm64 virtual machine**, using **RPM packages** for both **Erlang** and **RabbitMQ Server**. + +RabbitMQ requires Erlang to be installed before setting up the server. + + +### Prerequisites + +- GCP SUSE Linux Enterprise Server (Arm64) +- Root or sudo privileges +- Outbound internet access + +### Refresh System Repositories +This step updates the system’s package list so the operating system knows about the latest software available from its repositories. + +```console +sudo zypper refresh +``` + +### Install Required System Utilities +You can install the basic tools needed to download and manage packages. + +```console +sudo zypper install -y curl wget gnupg tar socat logrotate +``` + +### Download Erlang RPM (Arm64) +RabbitMQ depends on Erlang. Download the Erlang RPM compatible with the Arm64 architecture. + +```console +wget https://github.com/rabbitmq/erlang-rpm/releases/download/v26.2.5/erlang-26.2.5-1.el8.aarch64.rpm +sudo rpm -Uvh erlang-26.2.5-1.el8.aarch64.rpm +``` + +### Verify Erlang Installation +Confirm that Erlang is installed correctly. + +```console +erl -eval 'io:format("~s~n", [erlang:system_info(system_version)]), halt().' -noshell +``` + +You should see an output similar to: + +```output +Erlang/OTP 26 [erts-14.2.5] [source] [64-bit] [smp:4:4] [ds:4:4:10] [async-threads:1] [jit] +``` + +### Download RabbitMQ Server RPM +Download the RabbitMQ Server RPM package. + +```console +wget https://github.com/rabbitmq/rabbitmq-server/releases/download/v4.2.0/rabbitmq-server-4.2.0-1.el8.noarch.rpm +sudo rpm -Uvh rabbitmq-server-4.2.0-1.el8.noarch.rpm +``` + +{{% notice Note %}} +RabbitMQ version 3.11.0 introduced significant performance enhancements for Arm-based architectures. This version requires Erlang 25.0 or later, which brings Just-In-Time (JIT) compilation and modern flame graph profiling tooling to both x86 and ARM64 CPUs. These features result in improved performance on ARM64 architectures. +You can view [this release note](https://github.com/rabbitmq/rabbitmq-server/blob/main/release-notes/3.11.0.md) + +The [Arm Ecosystem Dashboard](https://developer.arm.com/ecosystem-dashboard/) recommends RabbitMQ version 3.11.0, the minimum recommended on the Arm platforms. +{{% /notice %}} + +### Enable and Start RabbitMQ Service +Enable RabbitMQ to start automatically on boot and start the service immediately. + +```console +sudo systemctl enable rabbitmq-server --now +``` + +### Verify RabbitMQ Service Status +Check the status of the RabbitMQ service. + +```console +sudo systemctl status rabbitmq-server +``` + +The service should be in an active (running) state. + +### Enable RabbitMQ Management Plugin +Enable the RabbitMQ management plugin to access the web-based dashboard. + +```console +sudo rabbitmq-plugins enable rabbitmq_management +``` + +### Restart RabbitMQ +Restart RabbitMQ to apply plugin changes. + +```console +sudo systemctl restart rabbitmq-server +``` + +### Verify RabbitMQ Version +Confirm the installed RabbitMQ version. + +```console +sudo rabbitmqctl version +``` + +You should see an output similar to: + +```output +4.2.0 +``` + +### Access RabbitMQ Management UI +Create a new RabbitMQ user for remote access + +- Create a new `admin` user + +**Run these commands on the VM:** + +```console +sudo rabbitmqctl add_user admin StrongPassword123 +sudo rabbitmqctl set_user_tags admin administrator +sudo rabbitmqctl set_permissions -p / admin ".*" ".*" ".*" +``` + +**Log in to Management UI** + +- **URL**: http://:15672 +- **Username**: admin +- **Password**: StrongPassword123 + +This is the production-safe approach. + +Now, let’s test it from outside the VM. Open a web browser on your local machine (Chrome, Firefox, Edge, etc.) and enter the following URL in the address bar: + +```console +[http:///info.php](http://:15672) +``` +- Replace `` with the public IP of your GCP VM. + +If everything is set up correctly, you will see a RabbitMQ login page in your browser. It looks like this: + +![RabbitMQ page alt-text#center](images/rabbitmq.png "Figure 1: RabbitMQ Login page") diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md new file mode 100644 index 0000000000..2b93bc950d --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/instance.md @@ -0,0 +1,31 @@ +--- +title: Create a Google Axion C4A Arm virtual machine on GCP +weight: 3 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## Overview + +In this section, you will learn how to provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console. + +{{% notice Note %}} +For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](https://learn.arm.com/learning-paths/servers-and-cloud-computing/csp/google/). +{{% /notice %}} + +## Provision a Google Axion C4A Arm VM in Google Cloud Console + +To create a virtual machine based on the C4A instance type: +- Navigate to the [Google Cloud Console](https://console.cloud.google.com/). +- Go to **Compute Engine > VM Instances** and select **Create Instance**. +- Under **Machine configuration**: + - Populate fields such as **Instance name**, **Region**, and **Zone**. + - Set **Series** to `C4A`. + - Select `c4a-standard-4` for machine type. + + ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") + +- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. Pick the preferred version for your Operating System. Ensure you select the **Arm image** variant. Click **Select**. +- Under **Networking**, enable **Allow HTTP traffic**. +- Click **Create** to launch the instance. diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md new file mode 100644 index 0000000000..c5f4e9b90c --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case1.md @@ -0,0 +1,210 @@ +--- +title: RabbitMQ Use Case 1 – Event Processing with Python Workers +weight: 6 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + +## RabbitMQ Use Case – Event Processing with Python Workers +This use case demonstrates how RabbitMQ enables event-driven architectures using topic exchanges, durable queues, and Python-based worker consumers. It focuses on reliable, asynchronous event processing, which is a common production pattern. + +- Topic exchange–based routing +- Durable queues and bindings +- A Python-based worker using the `pika` client +- Message publishing and consumption validation + +The use case models an **event-driven system**, where order-related events are published and processed asynchronously by workers. + +### Use Case Overview + +**Scenario:** +An application publishes order-related events (`order.created`, `order.updated`, etc.) to RabbitMQ. A background worker consumes these events from a queue and processes them independently. + +The goal of this use case is to showcase how order-related events can be published to RabbitMQ and processed asynchronously by background workers without tightly coupling producers and consumers. + +**Typical events include:** + +- order.created +- order.updated +- order.completed + +This architecture improves scalability, fault tolerance, and system decoupling. + +### Prerequisites + +- RabbitMQ installed and running +- RabbitMQ management plugin enabled +- Python 3 installed +- Network access to RabbitMQ broker + +### Declare a Topic Exchange +Create a durable topic exchange to route events based on routing keys. + +```console +./rabbitmqadmin declare exchange name=events type=topic durable=true +``` + +- Creates a durable topic exchange named events. +- Routes messages using wildcard-based routing keys (e.g., order.*). +- Ensures the exchange survives broker restarts. + +### Declare a Durable Queue +Create a durable queue to store order-related events. + +```console +./rabbitmqadmin declare queue name=order.events durable=true +``` + +- Create a durable queue for order events. +- Guarantee that messages are persisted until consumed. +- Ensure reliability in case of worker or broker restarts. + +You should see an output similar to: +```output +queue declared +``` + +### Bind Queue to Exchange +Bind the queue to the exchange using a topic routing pattern. + +```console +./rabbitmqadmin declare binding source=events destination=order.events routing_key="order.*" +``` + +- Connects the queue to the exchange. +- Ensures all order-related routing keys match the queue. +- Enables flexible event expansion without changing consumers. + +You should see an output similar to: +```output +binding declared +``` + +This binding ensures the queue receives all messages with routing keys such as: +- order.created +- order.updated +- order.completed + +### Publish an Event Message +Publish a sample order event to the exchange. + +```console +./rabbitmqadmin publish exchange=events routing_key="order.created" payload='{"order_id":123} +``` + +- Publishes an event to the events exchange. +- Uses a routing key that matches the binding filter. +- Payload is structured JSON to simulate real event data. + +You should see an output similar to: +```output +Message published +``` + +### Install Python Dependencies +Install pip and the pika RabbitMQ client library. + +```console +sudo zypper install -y python3-pip +pip install pika +``` + +### Create the Worker Script +Create a Python worker file to process messages from a queue. + +A **Python worker** was created to process messages from a RabbitMQ queue (jobs) using the pika library. The queue is durable, ensuring message persistence. The worker implements fair dispatch (prefetch_count=1) and manual acknowledgments to reliably process each job without loss. Messages were successfully published to the queue using rabbitmqadmin, and the worker consumed them as expected. + +```console +vi worker.py +``` + +**worker.py:** + +```python +import pika +import time +import json + +# RabbitMQ broker address +RABBITMQ_IP = "localhost" + +connection = pika.BlockingConnection( + pika.ConnectionParameters(host=RABBITMQ_IP) +) +channel = connection.channel() + +# Ensure queue exists +channel.queue_declare(queue='jobs', durable=True) + +print("Worker started. Waiting for jobs...") + +def process_job(ch, method, properties, body): + job = json.loads(body.decode()) + print(f"[Worker] Received job: {job}") + + # Simulate processing + time.sleep(2) + + # Acknowledge message + ch.basic_ack(delivery_tag=method.delivery_tag) + +# Fair dispatch configuration +channel.basic_qos(prefetch_count=1) + +channel.basic_consume( + queue='jobs', + on_message_callback=process_job +) + +channel.start_consuming() +``` + +### Start the Worker +Run the worker process. + +```console +python3 worker.py +``` + +You should see an output similar to: +```output +The worker started. Waiting for jobs... +``` + +### Publish Job Messages +From another terminal, publish a job message. + +```console +./rabbitmqadmin publish routing_key=jobs payload='{"job":"test1"}' +``` + +**Worker output:** + +```output +Worker started. Waiting for jobs... +[Worker] Received job: {'job': 'test1'} +``` + +Publish another job: + +```console +./rabbitmqadmin publish routing_key=jobs payload='{"job":"hello1"}' +``` + +**Worker output:** + +```output +Worker started. Waiting for jobs... +[Worker] Received job: {'job': 'hello1'} +``` + +## Use Case Validation + +- Event routing via topic exchanges functions correctly +- Durable queues and acknowledgments ensure reliable message processing +- Worker-based consumption supports safe and controlled job execution + + +This use case demonstrates how RabbitMQ enables reliable, decoupled, and scalable event processing using topic-based routing and Python workers. +The setup provides a strong foundation for production-grade, message-driven architectures on GCP SUSE Arm64 virtual machines. diff --git a/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md new file mode 100644 index 0000000000..1e4c58b790 --- /dev/null +++ b/content/learning-paths/servers-and-cloud-computing/rabbitmq-gcp/use-case2.md @@ -0,0 +1,305 @@ +--- +title: RabbitMQ use case 2 - WhatsApp Notification +weight: 7 + +### FIXED, DO NOT MODIFY +layout: learningpathall +--- + + +## WhatsApp Notification Use Case using RabbitMQ +This document demonstrates a **real-world asynchronous messaging use case** where RabbitMQ is used to process WhatsApp notifications reliably using a worker-based architecture. + +### Use Case Overview + +In many production systems, sending WhatsApp notifications must be: +- Reliable +- Asynchronous +- Independent of the main application flow + +RabbitMQ is used as a **message broker** to decouple message production from message consumption. + +### Architecture Flow + +1. Application publishes a message to RabbitMQ +2. RabbitMQ routes the message to a queue +3. A Python worker consumes the message +4. The worker simulates sending a WhatsApp notification + +### Prerequisites + +- GCP SUSE Arm64 virtual machine +- RabbitMQ is installed and running +- RabbitMQ Management Plugin enabled +- Python 3.8+ +- `pika` Python client library installed + +### Install Python Dependencies +Installs Python and the RabbitMQ Python client needed to build a consumer. + +```console +sudo zypper install -y python3 python3-pip +pip3 install pika +``` + +### RabbitMQ Topology +This use case uses a direct exchange topology for exact-match routing. + +**Exchanges** +- **notifications (direct):** Routes WhatsApp notification messages based on an exact routing key match. + +**Queue** +- **whatsapp.notifications (durable):** Stores WhatsApp messages persistently until they are consumed by a worker. + +**Binding** +- Exchange: **notifications** – Connects the exchange to the WhatsApp notification queue. +- Routing key: **whatsapp** – Ensures only WhatsApp-related messages are routed. +- Queue: **whatsapp.notifications**– Final destination where messages are delivered for processing. + +### Declare RabbitMQ Resources +Creates the required exchange, queue, and binding for WhatsApp notifications. + +- `Declare exchange`: Creates a durable direct exchange named notifications to route messages using exact routing keys. +- `Declare queue`: Creates a durable queue whatsapp.notifications to persist WhatsApp notification messages until consumed. +- `Declare binding`: Links the notifications exchange to the whatsapp.notifications queue using the whatsapp routing key. + +```console +./rabbitmqadmin declare exchange \ + name=notifications \ + type=direct \ + durable=true + +./rabbitmqadmin declare queue \ + name=whatsapp.notifications \ + durable=true + +./rabbitmqadmin declare binding \ + source=notifications \ + destination=whatsapp.notifications \ + routing_key=whatsapp +``` +Each command confirms successful creation with messages like **exchange declared, queue declared, and binding declared**. + +**Validate the setup:** + +Validates that RabbitMQ resources exist and are correctly connected. + +```console +./rabbitmqadmin list queues name messages +./rabbitmqadmin list exchanges name type +./rabbitmqadmin list bindings +``` + +- `list queues` displays all queues along with the number of messages currently stored in each queue. +- `list exchanges` lists all exchanges and their types, allowing verification of correct exchange configuration. +- `list bindings` shows how exchanges, queues, and routing keys are connected. + +**Output shows:** + +- notifications exchange of type direct +- whatsapp.notifications durable queue +- Correct routing key binding (whatsapp) +- Zero or more queued messages + +Confirms topology correctness before consuming messages. + +```output +> ./rabbitmqadmin list queues name messages ++------------------------+----------+ +| name | messages | ++------------------------+----------+ +| jobs | 0 | +| order.events | 1 | +| testqueue | 1 | +| whatsapp.notifications | 0 | ++------------------------+----------+ + +> ./rabbitmqadmin list exchanges name type ++--------------------+---------+ +| name | type | ++--------------------+---------+ +| | direct | +| amq.direct | direct | +| amq.fanout | fanout | +| amq.headers | headers | +| amq.match | headers | +| amq.rabbitmq.trace | topic | +| amq.topic | topic | +| events | topic | +| notifications | direct | ++--------------------+---------+ + +> ./rabbitmqadmin list bindings ++---------------+------------------------+------------------------+ +| source | destination | routing_key | ++---------------+------------------------+------------------------+ +| | jobs | jobs | +| | order.events | order.events | +| | testqueue | testqueue | +| | whatsapp.notifications | whatsapp.notifications | +| events | order.events | order.* | +| notifications | whatsapp.notifications | whatsapp | ++---------------+------------------------+------------------------+ +``` + +### WhatsApp Worker Implementation +The worker attaches as a **blocking consumer** to the `whatsapp.notifications` queue and processes incoming messages. + +Create a `whatsapp_worker.py` file with the content below: + +This Python script implements a **RabbitMQ consumer (worker)** that processes WhatsApp notification messages from a queue in a reliable and controlled manner. + +```python +import pika +import json +import time + +RABBITMQ_HOST = "localhost" +RABBITMQ_VHOST = "/" +RABBITMQ_USER = "guest" +RABBITMQ_PASS = "guest" +QUEUE_NAME = "whatsapp.notifications" + +credentials = pika.PlainCredentials(RABBITMQ_USER, RABBITMQ_PASS) + +parameters = pika.ConnectionParameters( + host=RABBITMQ_HOST, + virtual_host=RABBITMQ_VHOST, + credentials=credentials, + heartbeat=60 +) + +print("[DEBUG] Connecting to RabbitMQ...") +connection = pika.BlockingConnection(parameters) +channel = connection.channel() + +print("[DEBUG] Declaring queue...") +channel.queue_declare(queue=QUEUE_NAME, durable=True) + +print("[DEBUG] Setting QoS...") +channel.basic_qos(prefetch_count=1) + +print("WhatsApp Worker started. Waiting for messages...") + +def send_whatsapp(ch, method, properties, body): + data = json.loads(body.decode()) + print(f"[Worker] Sending WhatsApp message to {data['phone']}") + print(f"[Worker] Message content: {data['message']}") + + # Simulate external WhatsApp API call + time.sleep(1) + + print("[Worker] Message sent successfully") + ch.basic_ack(delivery_tag=method.delivery_tag) + +channel.basic_consume( + queue=QUEUE_NAME, + on_message_callback=send_whatsapp, + auto_ack=False +) + +print("[DEBUG] Starting consumer loop (this should block)...") +channel.start_consuming() +``` + +### Start the Worker +Run the worker in a dedicated terminal session: + +```console +python3 whatsapp_worker.py +``` + +The worker is running correctly and waiting for messages without exiting. + +**output:** + +```output +[DEBUG] Connecting to RabbitMQ... +[DEBUG] Declaring queue... +[DEBUG] Setting QoS... +WhatsApp Worker started. Waiting for messages... +[DEBUG] Starting consumer loop (this should BLOCK)... +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Your order #1234 has been confirmed +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +``` + +The process must block without returning to the shell prompt. + +### Publish a Test Message +From another terminal: Publishes a WhatsApp notification message to RabbitMQ. + +```console +./rabbitmqadmin publish \ + exchange=notifications \ + routing_key=whatsapp \ + payload='{"phone":"+911234567890","message":"Hello from RabbitMQ"}' +``` + +### Message Consumption Validation +The worker terminal displays logs similar to: + +```output +[DEBUG] Connecting to RabbitMQ... +[DEBUG] Declaring queue... +[DEBUG] Setting QoS... +WhatsApp Worker started. Waiting for messages... +[DEBUG] Starting consumer loop (this should BLOCK)... +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Your order #1234 has been confirmed +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Your order #1234 has been confirmed +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +9111 +[Worker] Message content: Test-1 +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Validation test +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Hello from RabbitMQ +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: Hello from RabbitMQ +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: FINAL validation test +[Worker] Message sent successfully +[Worker] Sending WhatsApp message to +911234567890 +[Worker] Message content: FINAL validation test +[Worker] Message sent successfully +``` +**What this confirms:** + +- Message routing works correctly +- Queue consumption is successful +- Manual acknowledgments are applied + +End-to-end message flow validated. + +### Verify Queue State + +```console +./rabbitmqadmin list queues name messages consumers +``` + +Expected output: + +```output ++------------------------+----------+-----------+ +| name | messages | consumers | ++------------------------+----------+-----------+ +| jobs | 1 | 0 | +| order.events | 1 | 0 | +| testqueue | 1 | 0 | +| whatsapp.notifications | 0 | 1 | ++------------------------+----------+-----------+ +``` + +**This confirms that:** + +- Messages were consumed successfully +- One active consumer is connected +- No backlog remains in the queue diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/_index.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/_index.md index 5e875cfde1..5f462ccc96 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/_index.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/_index.md @@ -1,22 +1,17 @@ --- title: Deploy TensorFlow on Google Cloud C4A (Arm-based Axion VMs) - -draft: true -cascade: - draft: true - minutes_to_complete: 30 -who_is_this_for: This is an introductory topic for software developers deploying and optimizing TensorFlow workloads on Arm64 Linux environments, specifically using Google Cloud C4A virtual machines powered by Axion processors. +who_is_this_for: This is an introductory topic for software developers deploying and optimizing TensorFlow workloads on Arm64 Linux environments, specifically using Google Cloud C4A virtual machines powered by Axion processors. learning_objectives: - - Provision an Arm-based SUSE SLES virtual machine on Google Cloud (C4A with Axion processors) + - Provision an Arm-based SUSE Linux Enterprise Server (SLES) virtual machine on Google Cloud (C4A with Axion processors) - Install TensorFlow on a SUSE Arm64 (C4A) instance - - Verify TensorFlow by running basic computation and model training tests on Arm64 - - Benchmark TensorFlow using tf.keras to evaluate inference speed and model performance on Arm64 systems. + - Verify TensorFlow by running basic computation and model training tests on Arm64 + - Benchmark TensorFlow using TensorFlow Keras (`tf.keras`) to evaluate inference speed and model performance on Arm64 systems prerequisites: - - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled + - A [Google Cloud Platform (GCP)](https://cloud.google.com/free) account with billing enabled - Basic familiarity with [TensorFlow](https://www.tensorflow.org/) author: Pareena Verma @@ -45,16 +40,14 @@ further_reading: title: Google Cloud documentation link: https://cloud.google.com/docs type: documentation - - resource: title: TensorFlow documentation link: https://www.tensorflow.org/learn type: documentation - - resource: title: Phoronix Test Suite (PTS) documentation link: https://www.phoronix-test-suite.com/ - type: documentation + type: documentation weight: 1 layout: "learningpathall" diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/background.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/background.md index bb3cf5b347..84220d9fe4 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/background.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/background.md @@ -1,24 +1,27 @@ --- -title: Getting started with TensorFlow on Google Axion C4A (Arm Neoverse-V2) +title: Get started with TensorFlow on Google Axion C4A weight: 2 layout: "learningpathall" --- -## Google Axion C4A Arm instances in Google Cloud +## Explore Google Axion C4A Arm instances in Google Cloud -Google Axion C4A is a family of Arm-based virtual machines built on Google’s custom Axion CPU, which is based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these virtual machines offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications. +Google Axion C4A is a family of Arm-based virtual machines (VMs) that use Google’s custom Axion CPU, based on Arm Neoverse-V2 cores. Designed for high-performance and energy-efficient computing, these VMs offer strong performance for modern cloud workloads such as CI/CD pipelines, microservices, media processing, and general-purpose applications. -The C4A series provides a cost-effective alternative to x86 virtual machines while leveraging the scalability and performance benefits of the Arm architecture in Google Cloud. +The C4A series provides a cost-effective alternative to x86 VMs while using the scalability and performance benefits of the Arm architecture in Google Cloud. -To learn more about Google Axion, refer to the [Introducing Google Axion Processors, our new Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog. +To learn more about Google Axion, see the [Introducing Google Axion Processors, our new Arm-based CPUs](https://cloud.google.com/blog/products/compute/introducing-googles-new-arm-based-cpu) blog. -## TensorFlow +## Explore TensorFlow -[TensorFlow](https://www.tensorflow.org/) is an **open-source machine learning and deep learning framework** developed by **Google**. It helps developers and researchers **build, train, and deploy AI models** efficiently across **CPUs, GPUs, and TPUs**. +[TensorFlow](https://www.tensorflow.org/) is an open-source machine learning and deep learning framework developed by Google. It helps developers and researchers build, train, and deploy AI models efficiently across CPUs, GPUs, and TPUs. -With support for **neural networks**, **natural language processing (NLP)**, and **computer vision**, TensorFlow is widely used for **AI research and production**. -Its **flexibility** and **scalability** make it ideal for both **cloud** and **edge environments**. +With support for neural networks, natural language processing (NLP), and computer vision, TensorFlow is widely used for AI research and production. Its flexibility and scalability make it ideal for both cloud and edge environments. To learn more, visit the [official TensorFlow website](https://www.tensorflow.org/). + +## Next steps + +Now that you understand Google Axion C4A Arm-based VMs and TensorFlow fundamentals, you can provision your own VM and start benchmarking machine learning workloads on Google Cloud. diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/baseline.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/baseline.md index ce9bae727e..f5a7d7bf7d 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/baseline.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/baseline.md @@ -1,5 +1,5 @@ --- -title: Test TensorFlow baseline performance on Google Axion C4A Arm virtual machines +title: Test TensorFlow baseline performance on Google Axion C4A weight: 5 ### FIXED, DO NOT MODIFY @@ -8,11 +8,11 @@ layout: learningpathall ## Perform baseline testing -This section helps you verify that TensorFlow is properly installed and working on your Google Axion C4A VM. You'll run tests to confirm that your CPU can perform TensorFlow operations correctly. +This section helps you confirm that TensorFlow is installed and working correctly on your Google Axion C4A Arm virtual machine (VM). You'll run tests to check that your CPU can perform TensorFlow operations and basic neural network training. -### Check available devices +## Check available devices -This command shows which hardware devices TensorFlow can use, such as CPU or GPU. On most VMs, you'll see only CPU listed: +List the hardware devices TensorFlow can use, such as CPU or GPU. On most VMs, only the CPU is available: ```console python -c "import tensorflow as tf; print(tf.config.list_physical_devices())" @@ -24,30 +24,28 @@ The output is similar to: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')] ``` -### Run a computation test +## Run a computation test -This test multiplies two large matrices to verify that TensorFlow computations work correctly on your CPU and measures execution time: +Multiply two large matrices to verify TensorFlow computations on your CPU and measure execution time: ```console -python -c "import tensorflow as tf; import time; -a = tf.random.uniform((1000,1000)); b = tf.random.uniform((1000,1000)); -start = time.time(); c = tf.matmul(a,b); end = time.time(); +python -c "import tensorflow as tf; import time; \ +a = tf.random.uniform((1000,1000)); b = tf.random.uniform((1000,1000)); \ +start = time.time(); c = tf.matmul(a,b); end = time.time(); \ print('Computation time:', end - start, 'seconds')" ``` -This checks CPU performance for basic operations and provides a baseline measurement. - The output is similar to: ```output Computation time: 0.008263111114501953 seconds ``` -### Test neural network execution +This provides a baseline measurement for CPU performance. -Use a text editor to create a new file named `test_nn.py` for testing a simple neural network. +## Test neural network execution -Add the following code to create and train a basic neural network using random data: +Create a file named `test_nn.py` with the following code: ```python import keras @@ -61,7 +59,7 @@ y = np.random.rand(1000, 1) # Define the model model = keras.Sequential() model.add(keras.Input(shape=(20,))) -model.add(layers.Dense(64,activation="relu")) +model.add(layers.Dense(64, activation="relu")) model.add(layers.Dense(1)) # Compile the model @@ -71,9 +69,9 @@ model.compile(optimizer='adam', loss='mse') model.fit(x, y, epochs=1, batch_size=32) ``` -This script creates a simple neural network to verify that TensorFlow's deep learning functions work properly on the Arm platform. +This script creates and trains a basic neural network using random data to verify that TensorFlow's deep learning functions work on the Arm platform. -### Run the neural network test +## Run the neural network test Execute the script: @@ -87,4 +85,4 @@ TensorFlow displays training progress similar to: 32/32 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - loss: 0.1024 ``` -This confirms that TensorFlow is working correctly on your Arm VM and can perform both basic computations and neural network training. +TensorFlow is working correctly on your Arm-based VM for both basic computations and neural network training. Your environment is ready for benchmarking. diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/benchmarking.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/benchmarking.md index 93a0a0e084..f65d13fa72 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/benchmarking.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/benchmarking.md @@ -8,11 +8,11 @@ layout: learningpathall ## Benchmark TensorFlow models -This section benchmarks multiple TensorFlow models (ResNet50, MobileNetV2, and InceptionV3) using dummy input data. You'll measure average inference time and throughput for each model running on the CPU. +This section benchmarks multiple TensorFlow models - ResNet50, MobileNetV2, and InceptionV3 - using dummy input data. You'll measure average inference time and throughput for each model running on the CPU of your Arm-based Google Cloud Platform (GCP) VM. -tf.keras is TensorFlow's high-level API for building, training, and benchmarking deep learning models. It provides access to predefined architectures such as ResNet, MobileNet, and Inception, making it easy to evaluate model performance on different hardware setups. +TensorFlow Keras (`tf.keras`) is TensorFlow's high-level API for building, training, and benchmarking deep learning models. It provides access to predefined architectures such as ResNet, MobileNet, and Inception, making it easy to evaluate model performance on different hardware setups. -### Activate your virtual environment +## Activate your virtual environment Enable your isolated Python environment where TensorFlow is installed: @@ -23,7 +23,7 @@ python -c "import tensorflow as tf; print(tf.__version__)" This ensures that all TensorFlow-related packages run in a clean, controlled setup without affecting system-wide Python installations. -### Install required packages +## Install required packages Install TensorFlow and NumPy for model creation and benchmarking: @@ -31,9 +31,9 @@ Install TensorFlow and NumPy for model creation and benchmarking: pip install tensorflow==2.20.0 numpy ``` -These packages are likely already installed from the previous installation steps. NumPy supports efficient numerical operations, while TensorFlow handles deep learning workloads. +These packages are likely already installed from previous steps. NumPy supports efficient numerical operations, while TensorFlow handles deep learning workloads. -### Create the benchmark script +## Create the benchmark script Use an editor to create a Python script named `tf_cpu_benchmark.py` that will run TensorFlow model benchmarking tests. @@ -74,7 +74,7 @@ for name, constructor in models.items(): This script creates model instances without pretrained weights for fair CPU testing, generates random image data for inference, includes a warm-up phase to stabilize model performance, and measures inference time over 50 runs to calculate average performance and throughput. -### Run the benchmark +## Run the benchmark Execute the benchmarking script: @@ -98,13 +98,13 @@ InceptionV3 average inference time per batch: 0.8971 seconds InceptionV3 throughput: 35.67 images/sec ``` -### Understand the results +## Interpret the results The benchmark provides key performance metrics. Average inference time per batch measures how long it takes to process one batch of input data, with lower values indicating faster performance. Throughput shows how many images the model can process per second, with higher values indicating better efficiency. -### Performance summary +## Performance summary -The following table shows results from running the benchmark on a `c4a-standard-4` (4 vCPU, 16 GB memory) aarch64 VM in GCP using SUSE: +The following table shows results from running the benchmark on a `c4a-standard-4` (4 vCPU, 16 GB memory) aarch64 VM in Google Cloud Platform (GCP) using SUSE Linux Enterprise Server (SLES): | Model | Average Inference Time per Batch (seconds) | Throughput (images/sec) | |-------------|-------------------------------------------:|------------------------:| @@ -112,4 +112,6 @@ The following table shows results from running the benchmark on a `c4a-standard- | MobileNetV2 | 0.2909 | 110.02 | | InceptionV3 | 0.8971 | 35.67 | -The results demonstrate strong performance for lightweight CNNs like MobileNetV2, achieving over 110 images/sec on the aarch64 platform. Medium-depth models like InceptionV3 maintain balanced performance between accuracy and latency. Heavier architectures such as ResNet50 show longer inference times but deliver stable throughput, confirming that TensorFlow workloads run efficiently on Arm processors and provide a cost-effective alternative for AI inference tasks. +The results show strong performance for lightweight CNNs like MobileNetV2, achieving over 110 images/sec on the aarch64 platform. Medium-depth models like InceptionV3 maintain balanced performance between accuracy and latency. Heavier architectures such as ResNet50 show longer inference times but deliver stable throughput. + +You have successfully benchmarked TensorFlow models on your Arm-based VM. This demonstrates the efficiency and scalability of Arm platforms for deep learning workloads - great job! diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/installation.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/installation.md index 9d19b13fef..2ff77e8472 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/installation.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/installation.md @@ -1,5 +1,5 @@ --- -title: Install TensorFlow +title: Install TensorFlow weight: 4 ### FIXED, DO NOT MODIFY @@ -8,11 +8,11 @@ layout: learningpathall ## Install TensorFlow on Google Axion C4A -TensorFlow is an open-source machine learning library developed by Google for building and deploying ML models efficiently. On aarch64 SUSE VMs, TensorFlow runs natively on CPU or GPU if available. +TensorFlow is an open-source machine learning (ML) library developed by Google for building and deploying ML models efficiently. On 64-bit Arm architecture (aarch64) SUSE Linux Enterprise Server (SLES) VMs, TensorFlow runs natively on the CPU and can use the GPU if available. -### Update your system +## Update your system -Update the system and install Python 3.11 with pip and virtual environment support: +Update your system and install Python 3.11, pip, and virtual environment support: ```console sudo zypper refresh @@ -21,13 +21,13 @@ sudo zypper install python311 python311-pip python311-venv Enter "y" when prompted to confirm the installation. This ensures your system has the essential tools required for TensorFlow setup. -### Verify Python installation +## Verify Python installation Confirm that Python and pip are correctly installed: ```console python3.11 --version -pip3 --version +pip3.11 --version ``` The output is similar to: @@ -37,7 +37,7 @@ Python 3.11.10 pip 22.3.1 from /usr/lib/python3.11/site-packages/pip (python 3.11) ``` -### Create a virtual environment +## Create a virtual environment Set up an isolated Python environment to keep TensorFlow dependencies separate from system packages: @@ -46,9 +46,9 @@ python3.11 -m venv tf-venv source tf-venv/bin/activate ``` -This creates and activates a virtual environment named `tf-venv` that prevents package conflicts. +Your virtual environment `tf-venv` is now active and isolated from system packages. -### Upgrade pip +## Upgrade pip Upgrade pip to the latest version for reliable package installation: @@ -56,7 +56,8 @@ Upgrade pip to the latest version for reliable package installation: pip3 install --upgrade pip ``` -### Install TensorFlow +## Install TensorFlow + Install the latest stable TensorFlow version for Arm64: ```console @@ -69,7 +70,7 @@ TensorFlow 2.18.0 introduced compatibility with NumPy 2.0, incorporating its upd The [Arm Ecosystem Dashboard](https://developer.arm.com/ecosystem-dashboard/) recommends TensorFlow version 2.18.0 as the minimum recommended version on Arm platforms. {{% /notice %}} -### Verify the installation +## Verify the installation Check that TensorFlow installed successfully and display the version: @@ -83,4 +84,4 @@ The output is similar to: 2.20.0 ``` -Your TensorFlow installation is now complete and ready for use. +You have now installed TensorFlow on your Arm-based VM and are ready to start building and running machine learning models! diff --git a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/instance.md b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/instance.md index ea1d56e7f1..a7b100ce12 100644 --- a/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/instance.md +++ b/content/learning-paths/servers-and-cloud-computing/tensorflow-gcp/instance.md @@ -1,43 +1,42 @@ --- -title: Create a Google Axion C4A Arm virtual machine on GCP +title: Create a Google Axion C4A Arm virtual machine on GCP weight: 3 ### FIXED, DO NOT MODIFY layout: learningpathall --- -## Overview +## Provision a Google Axion C4A Arm VM -In this section, you will learn how to provision a Google Axion C4A Arm virtual machine on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type in the Google Cloud Console. +In this section, you'll provision a Google Axion C4A Arm virtual machine (VM) on Google Cloud Platform (GCP) using the `c4a-standard-4` (4 vCPUs, 16 GB memory) machine type. {{% notice Note %}} -For support on GCP setup, see the Learning Path [Getting started with Google Cloud Platform](https://learn.arm.com/learning-paths/servers-and-cloud-computing/csp/google/). +For help with GCP setup, see the Learning Path [Getting started with Google Cloud Platform](/learning-paths/servers-and-cloud-computing/csp/google/). {{% /notice %}} -## Provision a Google Axion C4A Arm VM in Google Cloud Console +## Create your VM -To create a virtual machine based on the C4A instance type: -- Navigate to the [Google Cloud Console](https://console.cloud.google.com/). -- Go to **Compute Engine > VM Instances** and select **Create Instance**. +- Go to the [Google Cloud Console](https://console.cloud.google.com/). +- Select **Compute Engine** > **VM Instances** and click **Create Instance**. - Under **Machine configuration**: - - Populate fields such as **Instance name**, **Region**, and **Zone**. - - Set **Series** to `C4A`. - - Select `c4a-standard-4` for machine type. + - Fill in **Instance name**, **Region**, and **Zone**. + - Set **Series** to `C4A`. + - Choose `c4a-standard-4` for machine type. - ![Create a Google Axion C4A Arm virtual machine in the Google Cloud Console with c4a-standard-4 selected alt-text#center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") + ![Screenshot of the Google Cloud Console showing the creation of a new VM instance. The c4a-standard-4 machine type is selected under Machine configuration. The interface displays fields for instance name, region, zone, and machine type selection. alt-text #center](images/gcp-vm.png "Creating a Google Axion C4A Arm virtual machine in Google Cloud Console") - -- Under **OS and Storage**, select **Change**, then choose an Arm64-based OS image. For this Learning Path, use **SUSE Linux Enterprise Server**. -- If using use **SUSE Linux Enterprise Server**. Select "Pay As You Go" for the license type. -- Once appropriately selected, please Click **Select**. +- Under **OS and Storage**, click **Change** and select an Arm64-based OS image. For this Learning Path, choose **SUSE Linux Enterprise Server (SLES)**. + - Select "Pay As You Go" for the license type. + - Click **Select** to confirm your OS choice. - Under **Networking**, enable **Allow HTTP traffic**. -- Click **Create** to launch the instance. -- Once created, you should see a "SSH" option to the right in your list of VM instances. Click on this to launch a SSH shell into your VM instance: +- Click **Create** to launch your VM instance. +- Once created, find your VM in the list and click **SSH** to open an SSH session in your browser. + +![Screenshot showing the SSH option next to a running VM instance in the Google Cloud Console. The SSH button is highlighted, indicating how to launch a secure shell session into the VM. alt-text #center](images/gcp-ssh.png "Invoke an SSH session into your running VM instance") -![Invoke a SSH session via your browser alt-text#center](images/gcp-ssh.png "Invoke a SSH session into your running VM instance") +- A new browser window opens with a terminal shell connected to your VM. -- A window from your browser should come up and you should now see a shell into your VM instance: +![Screenshot of a terminal shell in the browser, connected to the running VM instance. The shell displays a command prompt, ready for user input. alt-text #center](images/gcp-shell.png "Terminal shell in your VM instance") -![Terminal Shell in your VM instance alt-text#center](images/gcp-shell.png "Terminal shell in your VM instance") -Next, let's install tensorflow! \ No newline at end of file +You have successfully provisioned an Arm-based VM on Google Cloud. Next, you'll install TensorFlow on your new instance! \ No newline at end of file diff --git a/data/stats_current_test_info.yml b/data/stats_current_test_info.yml index eec1fe44b8..77dc3b4868 100644 --- a/data/stats_current_test_info.yml +++ b/data/stats_current_test_info.yml @@ -94,6 +94,10 @@ sw_categories: gfortran: readable_title: GFortran tests_and_status: [] + github-copilot: + readable_title: GitHub Copilot + tests_and_status: + - ubuntu:latest: passed go: readable_title: Go tests_and_status: [] diff --git a/tools/resize_images.sh b/tools/resize_images.sh index 1450d3e794..b5b73375db 100755 --- a/tools/resize_images.sh +++ b/tools/resize_images.sh @@ -5,7 +5,7 @@ set -e # # Tunables: Defaults # -export dry_run=true # if true, do not perform any file modifications +export dry_run=false # if true, do not perform any file modifications export quality=85 # quality for webp conversion (1-100) export max_width=1600 # maximum width before resizing export target_width=1280 # target width for resizing @@ -78,8 +78,12 @@ process_images() { echo "Checking $img..." width=$(identify -format "%w" "$img" 2>/dev/null || echo 0) - # Get file size in KB (macOS stat is different) - filesize=$(stat -c%s "$img" 2>/dev/null || echo 0) + # Get file size in KB (handle both macOS and Linux) + if [[ "$OSTYPE" == "darwin"* ]]; then + filesize=$(stat -f%z "$img" 2>/dev/null || echo 0) + else + filesize=$(stat -c%s "$img" 2>/dev/null || echo 0) + fi kbsize=$((filesize / 1024)) # Define new filename @@ -107,8 +111,17 @@ process_images() { fi echo "Optimizing $img (${kbsize}KB, ${width}px), quality=$quality" # Resize and convert to WebP. If error occurs, capture it and exit. - error=$(magick "$img" -resize ${target_width}x\> -quality $quality -define webp:lossless=true "$webp_img" 2>&1) - convert_status=$? + # Try 'magick' first (newer ImageMagick), fall back to 'convert' (older/traditional) + if command -v magick &> /dev/null; then + error=$(magick "$img" -resize ${target_width}x\> -quality $quality -define webp:lossless=true "$webp_img" 2>&1) + convert_status=$? + elif command -v convert &> /dev/null; then + error=$(convert "$img" -resize ${target_width}x\> -quality $quality -define webp:lossless=true "$webp_img" 2>&1) + convert_status=$? + else + echo "⚠️ Neither 'magick' nor 'convert' command found. Please install ImageMagick." + exit 1 + fi if [ $convert_status -ne 0 ]; then echo "⚠️ Error converting $img to WebP format." if [ -z "${error}" ]; then @@ -130,8 +143,14 @@ process_images() { find "$img_dir" "$(dirname "$img_dir")" -name "*.md" 2>/dev/null | while read -r md_file; do if grep -q "$img_name" "$md_file"; then echo "Replacing $img_name → $webp_name in $md_file" - sed -i '' "s|($img_name|(${webp_name}|g" "$md_file" - sed -i '' "s|/$img_name|/${webp_name}|g" "$md_file" + # Handle sed differences between macOS and Linux + if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' "s|($img_name|(${webp_name}|g" "$md_file" + sed -i '' "s|/$img_name|/${webp_name}|g" "$md_file" + else + sed -i "s|($img_name|(${webp_name}|g" "$md_file" + sed -i "s|/$img_name|/${webp_name}|g" "$md_file" + fi fi done fi