diff --git a/README.md b/README.md index 618981b..5740ad4 100644 --- a/README.md +++ b/README.md @@ -9,16 +9,66 @@ pip install -e . # Install Playwright browsers (required) playwright install chromium + +# For LLM Agent features (optional) +pip install openai # For OpenAI models +pip install anthropic # For Claude models +pip install transformers torch # For local LLMs +``` + +## Quick Start: Choose Your Abstraction Level + +Sentience SDK offers **three abstraction levels** - use what fits your needs: + +### šŸŽÆ **Level 3: Natural Language (Easiest)** - For non-technical users + +```python +from sentience import SentienceBrowser, ConversationalAgent +from sentience.llm_provider import OpenAIProvider + +browser = SentienceBrowser() +llm = OpenAIProvider(api_key="your-key", model="gpt-4o") +agent = ConversationalAgent(browser, llm) + +with browser: + response = agent.execute("Search for magic mouse on google.com") + print(response) + # → "I searched for 'magic mouse' and found several results. + # The top result is from amazon.com selling Magic Mouse 2 for $79." ``` -## Quick Start +**Best for:** End users, chatbots, no-code platforms +**Code required:** 3-5 lines +**Technical knowledge:** None + +### āš™ļø **Level 2: Technical Commands (Recommended)** - For AI developers + +```python +from sentience import SentienceBrowser, SentienceAgent +from sentience.llm_provider import OpenAIProvider + +browser = SentienceBrowser() +llm = OpenAIProvider(api_key="your-key", model="gpt-4o") +agent = SentienceAgent(browser, llm) + +with browser: + browser.page.goto("https://google.com") + agent.act("Click the search box") + agent.act("Type 'magic mouse' into the search field") + agent.act("Press Enter key") +``` + +**Best for:** Building AI agents, automation scripts +**Code required:** 10-15 lines +**Technical knowledge:** Medium (Python basics) + +### šŸ”§ **Level 1: Direct SDK (Most Control)** - For production automation ```python from sentience import SentienceBrowser, snapshot, find, click -# Start browser with extension with SentienceBrowser(headless=False) as browser: - browser.goto("https://example.com", wait_until="domcontentloaded") + browser.page.goto("https://example.com") # Take snapshot - captures all interactive elements snap = snapshot(browser) @@ -31,6 +81,10 @@ with SentienceBrowser(headless=False) as browser: print(f"Click success: {result.success}") ``` +**Best for:** Maximum control, performance-critical apps +**Code required:** 20-50 lines +**Technical knowledge:** High (SDK API, selectors) + ## Real-World Example: Amazon Shopping Bot This example demonstrates navigating Amazon, finding products, and adding items to cart: diff --git a/examples/agent_layers_demo.py b/examples/agent_layers_demo.py new file mode 100644 index 0000000..c5432e7 --- /dev/null +++ b/examples/agent_layers_demo.py @@ -0,0 +1,222 @@ +""" +Demonstration of all three abstraction layers in Sentience SDK + +Layer 1: Direct SDK (Full Control) +Layer 2: SentienceAgent (Technical Commands) +Layer 3: ConversationalAgent (Natural Language) + +This script shows how the same task can be accomplished at different abstraction levels. +""" + +import os +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +def demo_layer1_direct_sdk(): + """ + Layer 1: Direct SDK Usage + - Full control over every action + - Requires knowing exact element selectors + - 50+ lines of code for typical automation + """ + print("\n" + "="*70) + print("LAYER 1: Direct SDK Usage (Full Control)") + print("="*70) + + from sentience import SentienceBrowser, snapshot, find, click, type_text, press + + with SentienceBrowser(headless=False) as browser: + # Navigate + browser.page.goto("https://google.com") + + # Get snapshot + snap = snapshot(browser) + + # Find search box manually + search_box = find(snap, "role=searchbox") + if not search_box: + search_box = find(snap, "role=textbox") + + # Click search box + click(browser, search_box.id) + + # Type query + type_text(browser, search_box.id, "magic mouse") + + # Press Enter + press(browser, "Enter") + + print("\nāœ… Layer 1 Demo Complete") + print(" Code required: ~20 lines") + print(" Technical knowledge: High") + print(" Flexibility: Maximum") + + +def demo_layer2_sentience_agent(): + """ + Layer 2: SentienceAgent (Technical Commands) + - High-level commands with LLM intelligence + - No need to know selectors + - 15 lines of code for typical automation + """ + print("\n" + "="*70) + print("LAYER 2: SentienceAgent (Technical Commands)") + print("="*70) + + from sentience import SentienceBrowser, SentienceAgent + from sentience.llm_provider import OpenAIProvider + + # Initialize + browser = SentienceBrowser(headless=False) + llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini") + agent = SentienceAgent(browser, llm, verbose=True) + + with browser: + browser.page.goto("https://google.com") + + # Execute technical commands + agent.act("Click the search box") + agent.act("Type 'magic mouse' into the search field") + agent.act("Press Enter key") + + print("\nāœ… Layer 2 Demo Complete") + print(" Code required: ~10 lines") + print(" Technical knowledge: Medium") + print(" Flexibility: High") + print(f" Tokens used: {agent.get_token_stats()['total_tokens']}") + + +def demo_layer3_conversational_agent(): + """ + Layer 3: ConversationalAgent (Natural Language) + - Pure natural language interface + - Automatic planning and execution + - 3 lines of code for typical automation + """ + print("\n" + "="*70) + print("LAYER 3: ConversationalAgent (Natural Language)") + print("="*70) + + from sentience import SentienceBrowser, ConversationalAgent + from sentience.llm_provider import OpenAIProvider + + # Initialize + browser = SentienceBrowser(headless=False) + llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o") + agent = ConversationalAgent(browser, llm, verbose=True) + + with browser: + # Execute in natural language (agent plans and executes automatically) + response = agent.execute("Search for magic mouse on google.com") + + print("\nāœ… Layer 3 Demo Complete") + print(" Code required: ~5 lines") + print(" Technical knowledge: None") + print(" Flexibility: Medium") + print(f" Agent Response: {response}") + + +def demo_layer3_with_local_llm(): + """ + Layer 3 with Local LLM (Zero Cost) + - Uses local Qwen 2.5 3B model + - No API costs + - Runs on your hardware + """ + print("\n" + "="*70) + print("LAYER 3: ConversationalAgent with Local LLM (Zero Cost)") + print("="*70) + + from sentience import SentienceBrowser, ConversationalAgent + from sentience.llm_provider import LocalLLMProvider + + # Initialize with local LLM + browser = SentienceBrowser(headless=False) + llm = LocalLLMProvider( + model_name="Qwen/Qwen2.5-3B-Instruct", + device="auto", # Use CUDA if available + load_in_4bit=True # Save memory with quantization + ) + agent = ConversationalAgent(browser, llm, verbose=True) + + with browser: + # Execute in natural language + response = agent.execute("Go to google.com and search for python tutorials") + + print("\nāœ… Layer 3 with Local LLM Demo Complete") + print(" API Cost: $0 (runs locally)") + print(" Privacy: 100% (no data sent to cloud)") + print(f" Agent Response: {response}") + + +def demo_comparison(): + """ + Side-by-side comparison of all layers + """ + print("\n" + "="*70) + print("COMPARISON: All Three Layers") + print("="*70) + + comparison_table = """ + | Feature | Layer 1 (SDK) | Layer 2 (Agent) | Layer 3 (Conversational) | + |--------------------------|------------------|------------------|--------------------------| + | Lines of code | 50+ | 15 | 3-5 | + | Technical knowledge | High | Medium | None | + | Requires selectors? | Yes | No | No | + | LLM required? | No | Yes | Yes | + | Cost per action | $0 | ~$0.005 | ~$0.010 | + | Speed | Fastest | Fast | Medium | + | Error handling | Manual | Auto-retry | Auto-recovery | + | Multi-step planning | Manual | Manual | Automatic | + | Natural language I/O | No | Commands only | Full conversation | + | Best for | Production | AI developers | End users | + """ + + print(comparison_table) + + +def main(): + """Run all demos""" + print("\n" + "="*70) + print("SENTIENCE SDK: Multi-Layer Abstraction Demo") + print("="*70) + print("\nThis demo shows how to use the SDK at different abstraction levels:") + print(" 1. Layer 1: Direct SDK (maximum control)") + print(" 2. Layer 2: SentienceAgent (technical commands)") + print(" 3. Layer 3: ConversationalAgent (natural language)") + print("\nChoose which demo to run:") + print(" 1 - Layer 1: Direct SDK") + print(" 2 - Layer 2: SentienceAgent") + print(" 3 - Layer 3: ConversationalAgent (OpenAI)") + print(" 4 - Layer 3: ConversationalAgent (Local LLM)") + print(" 5 - Show comparison table") + print(" 0 - Exit") + + choice = input("\nEnter your choice (0-5): ").strip() + + if choice == "1": + demo_layer1_direct_sdk() + elif choice == "2": + if not os.getenv("OPENAI_API_KEY"): + print("\nāŒ Error: OPENAI_API_KEY not set") + return + demo_layer2_sentience_agent() + elif choice == "3": + if not os.getenv("OPENAI_API_KEY"): + print("\nāŒ Error: OPENAI_API_KEY not set") + return + demo_layer3_conversational_agent() + elif choice == "4": + demo_layer3_with_local_llm() + elif choice == "5": + demo_comparison() + elif choice == "0": + print("Goodbye!") + else: + print("Invalid choice") + + +if __name__ == "__main__": + main() diff --git a/examples/test_local_llm_agent.py b/examples/test_local_llm_agent.py new file mode 100644 index 0000000..f2a5c1b --- /dev/null +++ b/examples/test_local_llm_agent.py @@ -0,0 +1,86 @@ +""" +Test script for LocalLLMProvider with Qwen2.5-3B-Instruct +Demonstrates using a local LLM with SentienceAgent +""" + +from sentience.llm_provider import LocalLLMProvider + +def test_local_llm_basic(): + """Test basic LLM response generation""" + print("="*70) + print("Testing LocalLLMProvider with Qwen2.5-3B-Instruct") + print("="*70) + + # Initialize local LLM + # Using the model from your local cache + llm = LocalLLMProvider( + model_name="Qwen/Qwen2.5-3B-Instruct", + device="auto", # Will use CUDA if available, else CPU + load_in_4bit=False, # Set to True to save memory + torch_dtype="auto" + ) + + print("\n" + "="*70) + print("Test 1: Simple question") + print("="*70) + + response = llm.generate( + system_prompt="You are a helpful web automation assistant.", + user_prompt="What is 2+2?", + max_new_tokens=50, + temperature=0.1 + ) + + print(f"Response: {response.content}") + print(f"Tokens: {response.total_tokens} (prompt: {response.prompt_tokens}, completion: {response.completion_tokens})") + + print("\n" + "="*70) + print("Test 2: Action parsing (for agent)") + print("="*70) + + system_prompt = """You are an AI web automation agent. + +GOAL: Click the search box + +VISIBLE ELEMENTS (sorted by importance, max 50): +[1]