1010EXCLUDE_DIRS = {
1111 ".git" , ".github" , ".pixi" , "mybook" , "__pycache__" , ".ipynb_checkpoints" ,
1212 ".vscode" , ".idea" , "venv" , "env" , "node_modules" , "site-packages" ,
13- "OTHERS" , " gemini chatbot" , "generate_book.py" , "LICENSE" , "README.md" , "CONTRIBUTING.md" , "CODE_OF_CONDUCT.md" , "pixi.lock" , "pixi.toml" , "file.txt"
13+ "gemini chatbot" , "generate_book.py" , "LICENSE" , "README.md" , "CONTRIBUTING.md" , "CODE_OF_CONDUCT.md" , "pixi.lock" , "pixi.toml" , "file.txt"
1414}
1515
1616def get_categories ():
@@ -24,20 +24,6 @@ def get_categories():
2424 categories .append (item )
2525 return sorted (categories , key = lambda x : x .name )
2626
27- def get_projects (category_path ):
28- projects = []
29- if not category_path .exists ():
30- return []
31-
32- for item in category_path .iterdir ():
33- if item .name .startswith ("." ):
34- continue
35- if item .is_dir ():
36- projects .append (item )
37- elif item .suffix == ".py" :
38- projects .append (item )
39- return sorted (projects , key = lambda x : x .name )
40-
4127def find_readme (project_path ):
4228 if project_path .is_file ():
4329 return None
@@ -50,95 +36,176 @@ def find_main_script(project_path):
5036 if project_path .is_file ():
5137 return project_path
5238
53- files = list (project_path .glob ("*.py" ))
39+ # Support .py and .sip
40+ files = list (project_path .glob ("*.py" )) + list (project_path .glob ("*.sip" ))
5441 if not files :
5542 return None
5643
5744 # Try to find main.py or project_name.py
5845 for f in files :
59- if f .name == "main.py" :
46+ if f .name in [ "main.py" , "index.py" , f" { project_path . name } .py" , f" { project_path . name } .sip" ] :
6047 return f
6148
62- folder_name = project_path .name
63- for f in files :
64- if f .name == f"{ folder_name } .py" :
65- return f
66-
6749 return files [0 ]
6850
69- def generate_qmd (category_path , project_path ):
70- readme_path = find_readme (project_path )
71- script_path = find_main_script (project_path )
51+ def get_projects (category_path ):
52+ projects = []
53+ if not category_path .exists ():
54+ return []
55+
56+ # Recursively find projects if it's OTHERS or deep structure
57+ def find_projects_rec (path , depth = 0 ):
58+ if depth > 10 : return # Increased depth for deep SIP paths
59+ try :
60+ for item in path .iterdir ():
61+ if item .name .startswith ("." ) or item .name in EXCLUDE_DIRS :
62+ continue
63+ if item .is_dir ():
64+ # If it has a readme or a script, it's a project
65+ if find_readme (item ) or any (item .glob ("*.py" )) or any (item .glob ("*.sip" )):
66+ projects .append (item )
67+ # Even if it's a project, it might have subprojects (like OTHERS/Jarvis)
68+ find_projects_rec (item , depth + 1 )
69+ elif item .suffix in [".py" , ".sip" ]:
70+ projects .append (item )
71+ except PermissionError :
72+ pass
73+
74+ if category_path .name == "OTHERS" :
75+ find_projects_rec (category_path )
76+ else :
77+ for item in category_path .iterdir ():
78+ if item .name .startswith ("." ) or item .name in EXCLUDE_DIRS :
79+ continue
80+ if item .is_dir ():
81+ projects .append (item )
82+ elif item .suffix in [".py" , ".sip" ]:
83+ projects .append (item )
7284
85+ # Use set to avoid duplicates and return sorted list
86+ unique_projects = []
87+ seen = set ()
88+ for p in projects :
89+ if p not in seen :
90+ unique_projects .append (p )
91+ seen .add (p )
92+
93+ return sorted (unique_projects , key = lambda x : str (x ))
94+
95+ def generate_qmd (category_path , project_path ):
7396 # If project_path is a file, the project name is the filename without extension
7497 project_name = project_path .stem if project_path .is_file () else project_path .name
7598
99+ # For projects deep in OTHERS, use a flattened name for the file
100+ try :
101+ rel_to_cat = project_path .relative_to (category_path )
102+ output_name = "_" .join (rel_to_cat .parts ).replace (".sip" , "" ).replace (".py" , "" )
103+ except ValueError :
104+ output_name = project_name
105+
106+ output_dir = BOOK_DIR / "projects" / category_path .name
107+ output_dir .mkdir (parents = True , exist_ok = True )
108+ output_file = output_dir / f"{ output_name } .qmd"
109+
110+ # Check if we should skip
111+ if output_file .exists ():
112+ return f"projects/{ category_path .name } /{ output_name } .qmd"
113+
114+ readme_path = find_readme (project_path )
115+ script_path = find_main_script (project_path )
116+
76117 content = []
77- content .append (f"# { project_name } " )
78- content .append ("" )
118+ readme_content = ""
79119
80120 if readme_path :
81121 try :
82122 with open (readme_path , "r" , encoding = "utf-8" , errors = "ignore" ) as f :
83123 readme_content = f .read ()
84-
85- # Sanitize to prevent YAML parsing errors
86124 readme_content = re .sub (r'^---$' , '***' , readme_content , flags = re .MULTILINE )
87125
88- # Fix relative paths for images and links
89126 def fix_path (match ):
90- prefix = match .group (1 )
91- path = match .group (2 )
92- suffix = match .group (3 )
93-
94- if path .startswith (('http' , '/' , '#' , 'mailto:' )):
95- return match .group (0 )
96-
97- # Decode URL-encoded paths (e.g., %2F -> /)
127+ prefix , path , suffix = match .groups ()
128+ if path .startswith (('http' , '/' , '#' , 'mailto:' )): return match .group (0 )
98129 import urllib .parse
99130 decoded_path = urllib .parse .unquote (path )
100-
101- # New path relative to the .qmd file
102- # .qmd files are at mybook/projects/<category>/<project_name>.qmd
103- # To get to repo root from there, we need ../../../
104- new_path = f"../../../{ category_path .name } /{ project_path .name } /{ decoded_path } "
131+ new_path = f"../../../{ project_path .relative_to (REPO_ROOT )} /{ decoded_path } "
105132 return f"{ prefix } { new_path } { suffix } "
106133
107- # Markdown images and links:  or [text](path)
108134 readme_content = re .sub (r'(\!??\[.*?\]\()(.*?)(\))' , fix_path , readme_content )
109- # HTML images: <img ... src="path" ... >
110135 readme_content = re .sub (r'(<img.*?src=["\'])(.*?)(["\'].*?>)' , fix_path , readme_content )
111-
112- # Escape @ symbols to prevent Quarto from interpreting them as citations
113- # Using HTML entity @ is more robust across different Pandoc versions/formats
114136 readme_content = re .sub (r'(?<!\\)@' , r'@' , readme_content )
115-
116- content .append (readme_content )
117- content .append ("" )
118137 except Exception as e :
119138 print (f"Error reading README for { project_name } : { e } " )
120139
140+ # More robust check for repeated title
141+ has_title_in_readme = False
142+ if readme_content :
143+ norm_content = readme_content .replace ('\\ n' , '\n ' )
144+ lines = norm_content .splitlines ()
145+ # Check first 5 non-empty lines for a title
146+ count = 0
147+ for line in lines :
148+ stripped = line .strip ()
149+ if not stripped : continue
150+ if stripped .startswith ("#" ):
151+ header_text = stripped .lstrip ('#' ).strip ()
152+ header_text = re .sub (r'^(Title|Project|Name):\s*' , '' , header_text , flags = re .IGNORECASE ).strip (' "\' *' )
153+ clean_header = re .sub (r'[^\w\s]' , '' , header_text ).lower ()
154+ clean_project = re .sub (r'[^\w\s]' , '' , project_name ).lower ()
155+ if clean_header == clean_project or clean_header == clean_project .replace (' ' , '' ) or clean_project in clean_header :
156+ has_title_in_readme = True
157+ break
158+ count += 1
159+ if count > 5 : break
160+
161+ if readme_content :
162+ if has_title_in_readme :
163+ # Find the first H1 and keep it, lower others
164+ lines = readme_content .splitlines ()
165+ new_lines = []
166+ found_first = False
167+ for line in lines :
168+ if line .startswith ("# " ) and not found_first :
169+ new_lines .append (line )
170+ found_first = True
171+ elif line .startswith ("#" ):
172+ new_lines .append ("#" + line )
173+ else :
174+ new_lines .append (line )
175+ readme_content = "\n " .join (new_lines )
176+ else :
177+ # Lower all headers
178+ readme_content = re .sub (r'^#' , '##' , readme_content , flags = re .MULTILINE )
179+
180+ if not has_title_in_readme :
181+ content .append (f"# { project_name } " )
182+ content .append ("" )
183+
184+ if readme_content :
185+ content .append (readme_content )
186+ content .append ("" )
187+ else :
188+ # Add a default description if README is missing
189+ content .append (f"This project contains scripts related to **{ project_name } **." )
190+ content .append ("" )
191+
121192 if script_path :
122193 try :
123194 with open (script_path , "r" , encoding = "utf-8" , errors = "ignore" ) as f :
124195 script_content = f .read ()
125196 content .append (f"## Source Code: { script_path .name } " )
126- content .append ("```python" )
197+ lang = "python" if script_path .suffix == ".py" else "cpp"
198+ content .append (f"```{ lang } " )
127199 content .append (script_content )
128200 content .append ("```" )
129201 content .append ("" )
130202 except Exception as e :
131203 print (f"Error reading script for { project_name } : { e } " )
132204
133- # Output path
134- output_dir = BOOK_DIR / "projects" / category_path .name
135- output_dir .mkdir (parents = True , exist_ok = True )
136- output_file = output_dir / f"{ project_name } .qmd"
137-
138205 with open (output_file , "w" , encoding = "utf-8" ) as f :
139206 f .write ("\n " .join (content ))
140207
141- return f"projects/{ category_path .name } /{ project_name } .qmd"
208+ return f"projects/{ category_path .name } /{ output_name } .qmd"
142209
143210def main ():
144211 if not BOOK_DIR .exists ():
@@ -168,7 +235,7 @@ def main():
168235
169236 chapters_config .append ("references.qmd" )
170237
171- # Generate _quarto.yml using yaml.dump
238+ # Generate _quarto.yml
172239 quarto_config = {
173240 "project" : {
174241 "type" : "book" ,
0 commit comments