refactor: generate book python script

partrita · partrita · commit db007afb0fa4 · 2026-02-15T15:40:36.000+09:00
diff --git a/OTHERS/Jarvis/ools/PyQt5/bindings/QtWidgets/qmdisubwindow.sip b/OTHERS/Jarvis/ools/PyQt5/bindings/QtWidgets/qmdisubwindow.sip
@@ -117,3 +117,7 @@ protected:
 };
 
 QFlags<QMdiSubWindow::SubWindowOption> operator|(QMdiSubWindow::SubWindowOption f1, QFlags<QMdiSubWindow::SubWindowOption> f2);
+QFlags<QMdiSubWindow::SubWindowOption> operator|(QMdiSubWindow::SubWindowOption f1, QMdiSubWindow::SubWindowOption f2);
+QFlags<QMdiSubWindow::SubWindowOption> operator&(QMdiSubWindow::SubWindowOption f1, QFlags<QMdiSubWindow::SubWindowOption> f2);
+QFlags<QMdiSubWindow::SubWindowOption> operator^(QMdiSubWindow::SubWindowOption f1, QFlags<QMdiSubWindow::SubWindowOption> f2);
+QFlags<QMdiSubWindow::SubWindowOption> operator~(QMdiSubWindow::SubWindowOption f1);
diff --git a/generate_book.py b/generate_book.py
@@ -10,7 +10,7 @@
 EXCLUDE_DIRS = {
     ".git", ".github", ".pixi", "mybook", "__pycache__", ".ipynb_checkpoints", 
     ".vscode", ".idea", "venv", "env", "node_modules", "site-packages",
-    "OTHERS", "gemini chatbot", "generate_book.py", "LICENSE", "README.md", "CONTRIBUTING.md", "CODE_OF_CONDUCT.md", "pixi.lock", "pixi.toml", "file.txt"
+    "gemini chatbot", "generate_book.py", "LICENSE", "README.md", "CONTRIBUTING.md", "CODE_OF_CONDUCT.md", "pixi.lock", "pixi.toml", "file.txt"
 }
 
 def get_categories():
@@ -24,20 +24,6 @@ def get_categories():
             categories.append(item)
     return sorted(categories, key=lambda x: x.name)
 
-def get_projects(category_path):
-    projects = []
-    if not category_path.exists():
-        return []
-
-    for item in category_path.iterdir():
-        if item.name.startswith("."):
-            continue
-        if item.is_dir():
-            projects.append(item)
-        elif item.suffix == ".py":
-            projects.append(item)
-    return sorted(projects, key=lambda x: x.name)
-
 def find_readme(project_path):
     if project_path.is_file():
         return None
@@ -50,95 +36,176 @@ def find_main_script(project_path):
     if project_path.is_file():
         return project_path
 
-    files = list(project_path.glob("*.py"))
+    # Support .py and .sip
+    files = list(project_path.glob("*.py")) + list(project_path.glob("*.sip"))
     if not files:
         return None
     
     # Try to find main.py or project_name.py
     for f in files:
-        if f.name == "main.py":
+        if f.name in ["main.py", "index.py", f"{project_path.name}.py", f"{project_path.name}.sip"]:
             return f
     
-    folder_name = project_path.name
-    for f in files:
-        if f.name == f"{folder_name}.py":
-            return f
-        
     return files[0]
 
-def generate_qmd(category_path, project_path):
-    readme_path = find_readme(project_path)
-    script_path = find_main_script(project_path)
+def get_projects(category_path):
+    projects = []
+    if not category_path.exists():
+        return []
+
+    # Recursively find projects if it's OTHERS or deep structure
+    def find_projects_rec(path, depth=0):
+        if depth > 10: return # Increased depth for deep SIP paths
+        try:
+            for item in path.iterdir():
+                if item.name.startswith(".") or item.name in EXCLUDE_DIRS:
+                    continue
+                if item.is_dir():
+                    # If it has a readme or a script, it's a project
+                    if find_readme(item) or any(item.glob("*.py")) or any(item.glob("*.sip")):
+                        projects.append(item)
+                    # Even if it's a project, it might have subprojects (like OTHERS/Jarvis)
+                    find_projects_rec(item, depth + 1)
+                elif item.suffix in [".py", ".sip"]:
+                    projects.append(item)
+        except PermissionError:
+            pass
+
+    if category_path.name == "OTHERS":
+        find_projects_rec(category_path)
+    else:
+        for item in category_path.iterdir():
+            if item.name.startswith(".") or item.name in EXCLUDE_DIRS:
+                continue
+            if item.is_dir():
+                projects.append(item)
+            elif item.suffix in [".py", ".sip"]:
+                projects.append(item)
     
+    # Use set to avoid duplicates and return sorted list
+    unique_projects = []
+    seen = set()
+    for p in projects:
+        if p not in seen:
+            unique_projects.append(p)
+            seen.add(p)
+    
+    return sorted(unique_projects, key=lambda x: str(x))
+
+def generate_qmd(category_path, project_path):
     # If project_path is a file, the project name is the filename without extension
     project_name = project_path.stem if project_path.is_file() else project_path.name
     
+    # For projects deep in OTHERS, use a flattened name for the file
+    try:
+        rel_to_cat = project_path.relative_to(category_path)
+        output_name = "_".join(rel_to_cat.parts).replace(".sip", "").replace(".py", "")
+    except ValueError:
+        output_name = project_name
+
+    output_dir = BOOK_DIR / "projects" / category_path.name
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_file = output_dir / f"{output_name}.qmd"
+    
+    # Check if we should skip
+    if output_file.exists():
+        return f"projects/{category_path.name}/{output_name}.qmd"
+
+    readme_path = find_readme(project_path)
+    script_path = find_main_script(project_path)
+    
     content = []
-    content.append(f"# {project_name}")
-    content.append("")
+    readme_content = ""
     
     if readme_path:
         try:
             with open(readme_path, "r", encoding="utf-8", errors="ignore") as f:
                 readme_content = f.read()
-                
-                # Sanitize to prevent YAML parsing errors
                 readme_content = re.sub(r'^---$', '***', readme_content, flags=re.MULTILINE)
                 
-                # Fix relative paths for images and links
                 def fix_path(match):
-                    prefix = match.group(1)
-                    path = match.group(2)
-                    suffix = match.group(3)
-                    
-                    if path.startswith(('http', '/', '#', 'mailto:')):
-                        return match.group(0)
-                    
-                    # Decode URL-encoded paths (e.g., %2F -> /)
+                    prefix, path, suffix = match.groups()
+                    if path.startswith(('http', '/', '#', 'mailto:')): return match.group(0)
                     import urllib.parse
                     decoded_path = urllib.parse.unquote(path)
-                    
-                    # New path relative to the .qmd file
-                    # .qmd files are at mybook/projects/<category>/<project_name>.qmd
-                    # To get to repo root from there, we need ../../../
-                    new_path = f"../../../{category_path.name}/{project_path.name}/{decoded_path}"
+                    new_path = f"../../../{project_path.relative_to(REPO_ROOT)}/{decoded_path}"
                     return f"{prefix}{new_path}{suffix}"
 
-                # Markdown images and links: ![alt](path) or [text](path)
                 readme_content = re.sub(r'(\!??\[.*?\]\()(.*?)(\))', fix_path, readme_content)
-                # HTML images: <img ... src="path" ... >
                 readme_content = re.sub(r'(<img.*?src=["\'])(.*?)(["\'].*?>)', fix_path, readme_content)
-
-                # Escape @ symbols to prevent Quarto from interpreting them as citations
-                # Using HTML entity &#64; is more robust across different Pandoc versions/formats
                 readme_content = re.sub(r'(?<!\\)@', r'&#64;', readme_content)
-
-                content.append(readme_content)
-                content.append("")
         except Exception as e:
             print(f"Error reading README for {project_name}: {e}")
 
+    # More robust check for repeated title
+    has_title_in_readme = False
+    if readme_content:
+        norm_content = readme_content.replace('\\n', '\n')
+        lines = norm_content.splitlines()
+        # Check first 5 non-empty lines for a title
+        count = 0
+        for line in lines:
+            stripped = line.strip()
+            if not stripped: continue
+            if stripped.startswith("#"):
+                header_text = stripped.lstrip('#').strip()
+                header_text = re.sub(r'^(Title|Project|Name):\s*', '', header_text, flags=re.IGNORECASE).strip(' "\'*')
+                clean_header = re.sub(r'[^\w\s]', '', header_text).lower()
+                clean_project = re.sub(r'[^\w\s]', '', project_name).lower()
+                if clean_header == clean_project or clean_header == clean_project.replace(' ', '') or clean_project in clean_header:
+                    has_title_in_readme = True
+                    break
+            count += 1
+            if count > 5: break
+
+    if readme_content:
+        if has_title_in_readme:
+            # Find the first H1 and keep it, lower others
+            lines = readme_content.splitlines()
+            new_lines = []
+            found_first = False
+            for line in lines:
+                if line.startswith("# ") and not found_first:
+                    new_lines.append(line)
+                    found_first = True
+                elif line.startswith("#"):
+                    new_lines.append("#" + line)
+                else:
+                    new_lines.append(line)
+            readme_content = "\n".join(new_lines)
+        else:
+            # Lower all headers
+            readme_content = re.sub(r'^#', '##', readme_content, flags=re.MULTILINE)
+
+    if not has_title_in_readme:
+        content.append(f"# {project_name}")
+        content.append("")
+    
+    if readme_content:
+        content.append(readme_content)
+        content.append("")
+    else:
+        # Add a default description if README is missing
+        content.append(f"This project contains scripts related to **{project_name}**.")
+        content.append("")
+
     if script_path:
         try:
             with open(script_path, "r", encoding="utf-8", errors="ignore") as f:
                 script_content = f.read()
                 content.append(f"## Source Code: {script_path.name}")
-                content.append("```python")
+                lang = "python" if script_path.suffix == ".py" else "cpp"
+                content.append(f"```{lang}")
                 content.append(script_content)
                 content.append("```")
                 content.append("")
         except Exception as e:
             print(f"Error reading script for {project_name}: {e}")
     
-    # Output path
-    output_dir = BOOK_DIR / "projects" / category_path.name
-    output_dir.mkdir(parents=True, exist_ok=True)
-    output_file = output_dir / f"{project_name}.qmd"
-    
     with open(output_file, "w", encoding="utf-8") as f:
         f.write("\n".join(content))
         
-    return f"projects/{category_path.name}/{project_name}.qmd"
+    return f"projects/{category_path.name}/{output_name}.qmd"
 
 def main():
     if not BOOK_DIR.exists():
@@ -168,7 +235,7 @@ def main():
 
     chapters_config.append("references.qmd")
 
-    # Generate _quarto.yml using yaml.dump
+    # Generate _quarto.yml
     quarto_config = {
         "project": {
             "type": "book",