feat(parser): Add semantic parser and pipeline implementation

dugshub · dugshub · commit 81be49a4f768 · 2025-09-30T11:52:57.000-04:00
- SemanticTextParser using semantic types throughout
- SemanticParserPipeline for composing semantic parsers
- Protocol definition for semantic parsers
- Integration with command registry for validation
diff --git a/src/cli_patterns/ui/parser/semantic_parser.py b/src/cli_patterns/ui/parser/semantic_parser.py
@@ -0,0 +1,189 @@
+"""Semantic text parser using semantic types for enhanced type safety.
+
+This module provides SemanticTextParser, which is like TextParser but works
+with semantic types and provides semantic-aware parsing capabilities.
+"""
+
+from __future__ import annotations
+
+import shlex
+from typing import Optional
+
+from cli_patterns.core.parser_types import (
+    CommandId,
+    make_argument_value,
+    make_command_id,
+    make_flag_name,
+    make_option_key,
+)
+from cli_patterns.ui.parser.semantic_context import SemanticContext
+from cli_patterns.ui.parser.semantic_errors import SemanticParseError
+from cli_patterns.ui.parser.semantic_registry import SemanticCommandRegistry
+from cli_patterns.ui.parser.semantic_result import SemanticParseResult
+
+
+class SemanticTextParser:
+    """Parser for standard text-based commands with semantic type support.
+
+    Handles parsing of commands with arguments, short/long flags, and key-value options,
+    returning semantic types for enhanced type safety and better intellisense support.
+    """
+
+    def __init__(self) -> None:
+        """Initialize semantic text parser."""
+        self._registry: Optional[SemanticCommandRegistry] = None
+
+    def set_registry(self, registry: SemanticCommandRegistry) -> None:
+        """Set the command registry for validation and suggestions.
+
+        Args:
+            registry: Semantic command registry to use
+        """
+        self._registry = registry
+
+    def can_parse(self, input_str: str, context: SemanticContext) -> bool:
+        """Check if input can be parsed by this semantic text parser.
+
+        Args:
+            input_str: Input string to check
+            context: Semantic parsing context
+
+        Returns:
+            True if input is non-empty text that doesn't start with shell prefix
+        """
+        if not input_str or not input_str.strip():
+            return False
+
+        # Don't handle shell commands (those start with !)
+        if input_str.lstrip().startswith("!"):
+            return False
+
+        return True
+
+    def parse(self, input_str: str, context: SemanticContext) -> SemanticParseResult:
+        """Parse text input into structured semantic command result.
+
+        Args:
+            input_str: Input string to parse
+            context: Semantic parsing context
+
+        Returns:
+            SemanticParseResult with parsed command, args, flags, and options
+
+        Raises:
+            SemanticParseError: If parsing fails or command is unknown
+        """
+        if not self.can_parse(input_str, context):
+            if not input_str.strip():
+                raise SemanticParseError(
+                    error_type="EMPTY_INPUT",
+                    message="Empty input cannot be parsed",
+                    suggestions=[make_command_id("help")],
+                )
+            else:
+                raise SemanticParseError(
+                    error_type="INVALID_INPUT",
+                    message="Input cannot be parsed by text parser",
+                    suggestions=[make_command_id("help")],
+                )
+
+        try:
+            # Use shlex for proper quote handling
+            tokens = shlex.split(input_str.strip())
+        except ValueError as e:
+            # Handle shlex errors (e.g., unmatched quotes)
+            error_msg = str(e).replace("quotation", "quote")
+            raise SemanticParseError(
+                error_type="QUOTE_MISMATCH",
+                message=f"Syntax error in command: {error_msg}",
+                suggestions=[make_command_id("help")],
+            ) from e
+
+        if not tokens:
+            raise SemanticParseError(
+                error_type="EMPTY_INPUT",
+                message="No command found after parsing",
+                suggestions=[make_command_id("help")],
+            )
+
+        # First token is the command
+        command_str = tokens[0]
+        command = make_command_id(command_str)
+
+        # Check if command is registered (if we have a registry)
+        if self._registry and not self._registry.is_registered(command):
+            suggestions = self._registry.get_suggestions(command_str, max_suggestions=3)
+            if not suggestions:
+                suggestions = [make_command_id("help")]
+
+            raise SemanticParseError(
+                error_type="UNKNOWN_COMMAND",
+                message=f"Unknown command: {command_str}",
+                command=command,
+                suggestions=suggestions,
+            )
+
+        # Parse remaining tokens into args, flags, and options
+        args = []
+        flags = set()
+        options = {}
+
+        i = 1
+        while i < len(tokens):
+            token = tokens[i]
+
+            if token.startswith("--"):
+                # Long option handling
+                if "=" in token:
+                    # Format: --key=value
+                    key_value = token[2:]  # Remove --
+                    if "=" in key_value:
+                        key, value = key_value.split("=", 1)
+                        options[make_option_key(key)] = make_argument_value(value)
+                else:
+                    # Format: --key value (next token is value)
+                    key = token[2:]  # Remove --
+                    if i + 1 < len(tokens) and not tokens[i + 1].startswith("-"):
+                        options[make_option_key(key)] = make_argument_value(
+                            tokens[i + 1]
+                        )
+                        i += 1  # Skip the value token
+                    else:
+                        # Treat as flag if no value follows
+                        flags.add(make_flag_name(key))
+
+            elif token.startswith("-") and len(token) > 1:
+                # Short flag(s) handling
+                flag_chars = token[1:]  # Remove -
+                for char in flag_chars:
+                    flags.add(make_flag_name(char))
+
+            else:
+                # Regular argument
+                args.append(make_argument_value(token))
+
+            i += 1
+
+        return SemanticParseResult(
+            command=command,
+            args=args,
+            flags=flags,
+            options=options,
+            raw_input=input_str,
+        )
+
+    def get_suggestions(self, partial: str) -> list[CommandId]:
+        """Get completion suggestions for partial input.
+
+        Args:
+            partial: Partial input to complete
+
+        Returns:
+            List of semantic command suggestions
+        """
+        if not self._registry:
+            # Return some default suggestions if no registry
+            defaults = ["help", "status", "version"]
+            return [make_command_id(cmd) for cmd in defaults if cmd.startswith(partial)]
+
+        return self._registry.get_suggestions(partial)
diff --git a/src/cli_patterns/ui/parser/semantic_pipeline.py b/src/cli_patterns/ui/parser/semantic_pipeline.py
@@ -0,0 +1,209 @@
+"""Semantic parser pipeline for routing input to semantic parsers.
+
+This module provides SemanticParserPipeline, which routes input to semantic parsers
+that work with semantic types and contexts for enhanced type safety.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Callable, Optional, Protocol, runtime_checkable
+
+from cli_patterns.core.parser_types import CommandId
+from cli_patterns.ui.parser.semantic_context import SemanticContext
+from cli_patterns.ui.parser.semantic_errors import SemanticParseError
+from cli_patterns.ui.parser.semantic_result import SemanticParseResult
+
+
+@runtime_checkable
+class SemanticParser(Protocol):
+    """Protocol defining the interface for semantic command parsers.
+
+    Semantic parsers work with semantic types and contexts to provide
+    enhanced type safety for command parsing operations.
+    """
+
+    def can_parse(self, input_str: str, context: SemanticContext) -> bool:
+        """Determine if this parser can handle the given input.
+
+        Args:
+            input_str: Raw input string to evaluate
+            context: Current semantic parsing context
+
+        Returns:
+            True if this parser can handle the input, False otherwise
+        """
+        ...
+
+    def parse(self, input_str: str, context: SemanticContext) -> SemanticParseResult:
+        """Parse the input string into a structured SemanticParseResult.
+
+        Args:
+            input_str: Raw input string to parse
+            context: Current semantic parsing context
+
+        Returns:
+            SemanticParseResult containing parsed command, args, flags, and options
+
+        Raises:
+            SemanticParseError: If parsing fails or input is invalid
+        """
+        ...
+
+    def get_suggestions(self, partial: str) -> list[CommandId]:
+        """Get completion suggestions for partial input.
+
+        Args:
+            partial: Partial input string to complete
+
+        Returns:
+            List of suggested semantic command completions
+        """
+        ...
+
+
+@dataclass
+class _SemanticParserEntry:
+    """Internal entry for storing semantic parser with metadata."""
+
+    parser: SemanticParser
+    condition: Optional[Callable[[str, SemanticContext], bool]]
+    priority: int
+
+
+class SemanticParserPipeline:
+    """Pipeline for routing input to appropriate semantic parsers.
+
+    The pipeline maintains a list of semantic parsers with optional conditions and priorities.
+    When parsing input, it tries each parser in order until one succeeds, maintaining
+    semantic type safety throughout the process.
+    """
+
+    def __init__(self) -> None:
+        """Initialize empty semantic parser pipeline."""
+        self._parsers: list[_SemanticParserEntry] = []
+
+    def add_parser(
+        self,
+        parser: SemanticParser,
+        condition: Optional[Callable[[str, SemanticContext], bool]] = None,
+        priority: int = 0,
+    ) -> None:
+        """Add a semantic parser to the pipeline.
+
+        Args:
+            parser: Semantic parser instance to add
+            condition: Optional condition function that returns True if parser should handle input
+            priority: Priority for ordering (higher numbers = higher priority, default 0)
+        """
+        entry = _SemanticParserEntry(
+            parser=parser, condition=condition, priority=priority
+        )
+        self._parsers.append(entry)
+
+        # Sort by priority (higher numbers first), maintaining insertion order for same priority
+        self._parsers.sort(
+            key=lambda x: (
+                -x.priority,
+                (
+                    self._parsers.index(x)
+                    if x in self._parsers[:-1]
+                    else len(self._parsers)
+                ),
+            )
+        )
+
+    def remove_parser(self, parser: SemanticParser) -> bool:
+        """Remove a semantic parser from the pipeline.
+
+        Args:
+            parser: Semantic parser instance to remove
+
+        Returns:
+            True if parser was found and removed, False otherwise
+        """
+        for i, entry in enumerate(self._parsers):
+            if entry.parser is parser:
+                self._parsers.pop(i)
+                return True
+        return False
+
+    def parse(self, input_str: str, context: SemanticContext) -> SemanticParseResult:
+        """Parse input using the first matching semantic parser in the pipeline.
+
+        Args:
+            input_str: Input string to parse
+            context: Semantic parsing context
+
+        Returns:
+            SemanticParseResult from the first parser that can handle the input
+
+        Raises:
+            SemanticParseError: If no parser can handle the input or parsing fails
+        """
+        if not self._parsers:
+            raise SemanticParseError(
+                error_type="NO_PARSERS",
+                message="No parsers available in pipeline",
+                suggestions=[],
+            )
+
+        matching_parsers = []
+        condition_errors = []
+
+        # Find all parsers that can handle the input
+        for entry in self._parsers:
+            try:
+                # Check condition if provided
+                if entry.condition is not None:
+                    if not entry.condition(input_str, context):
+                        continue
+
+                # Check if parser can handle the input
+                if hasattr(entry.parser, "can_parse"):
+                    if entry.parser.can_parse(input_str, context):
+                        matching_parsers.append(entry)
+                else:
+                    # If no can_parse method, assume it can handle it
+                    matching_parsers.append(entry)
+
+            except Exception as e:
+                # Condition function failed, skip this parser
+                condition_errors.append(f"Condition failed for parser: {e}")
+                continue
+
+        if not matching_parsers:
+            error_msg = "No parser can handle the input"
+            if condition_errors:
+                error_msg += f". Condition errors: {'; '.join(condition_errors)}"
+
+            raise SemanticParseError(
+                error_type="NO_MATCHING_PARSER",
+                message=error_msg,
+                suggestions=[],
+            )
+
+        # Try the first matching parser (highest priority)
+        parser_entry = matching_parsers[0]
+
+        try:
+            return parser_entry.parser.parse(input_str, context)
+        except SemanticParseError:
+            # Re-raise semantic parse errors from the parser
+            raise
+        except Exception as e:
+            # Convert other exceptions to SemanticParseError
+            raise SemanticParseError(
+                error_type="PARSER_ERROR",
+                message=f"Parser failed: {str(e)}",
+                suggestions=[],
+            ) from e
+
+    def clear(self) -> None:
+        """Clear all parsers from the pipeline."""
+        self._parsers.clear()
+
+    @property
+    def parser_count(self) -> int:
+        """Get the number of parsers in the pipeline."""
+        return len(self._parsers)