@@ -31,13 +31,21 @@ def info(message: str) -> None:
3131 print (f"[git-export] { message } " , flush = True )
3232
3333
34- def run_git (git_bin : str , args : list [str ], cwd : Path | None = None , verbose : bool = False ) -> None :
34+ def run_git (
35+ git_bin : str , args : list [str ], cwd : Path | None = None , verbose : bool = False
36+ ) -> None :
3537 cmd = [git_bin , * args ]
3638 if verbose :
3739 location = str (cwd ) if cwd else os .getcwd ()
3840 print (f"+ (cwd={ location } ) { ' ' .join (cmd )} " )
3941 try :
40- subprocess .run (cmd , cwd = str (cwd ) if cwd else None , check = True , text = True , capture_output = True )
42+ subprocess .run (
43+ cmd ,
44+ cwd = str (cwd ) if cwd else None ,
45+ check = True ,
46+ text = True ,
47+ capture_output = True ,
48+ )
4149 except subprocess .CalledProcessError as e :
4250 stderr = (e .stderr or "" ).strip ()
4351 stdout = (e .stdout or "" ).strip ()
@@ -65,14 +73,16 @@ def parse_github_directory_url(url: str) -> tuple[str, str, str | None]:
6573 - https://github.com/org/repo/blob/main/lang/ruby
6674 """
6775 parsed = urllib .parse .urlparse (url )
68- if parsed .scheme not in ("http" , "https" ) or parsed .netloc not in ("github.com" , "www.github.com" ):
76+ if parsed .scheme not in ("http" , "https" ) or parsed .netloc not in (
77+ "github.com" ,
78+ "www.github.com" ,
79+ ):
6980 raise GitExportError (f"Not a supported GitHub URL: { url } " )
7081
7182 parts = [p for p in parsed .path .split ("/" ) if p ]
7283 if len (parts ) < 3 :
7384 raise GitExportError (
74- "GitHub URL must include a directory path after owner/repo "
75- f"(got: { url } )"
85+ f"GitHub URL must include a directory path after owner/repo (got: { url } )"
7686 )
7787
7888 owner = parts [0 ]
@@ -87,8 +97,7 @@ def parse_github_directory_url(url: str) -> tuple[str, str, str | None]:
8797 if rest [0 ] in ("tree" , "blob" ):
8898 if len (rest ) < 3 :
8999 raise GitExportError (
90- "tree/blob URLs must include ref and directory path, "
91- f"got: { url } "
100+ f"tree/blob URLs must include ref and directory path, got: { url } "
92101 )
93102 ref = rest [1 ]
94103 source = "/" .join (rest [2 :])
@@ -171,8 +180,18 @@ def export_directory(
171180
172181 info ("Step 2/6: configuring sparse checkout" )
173182 step_start = time .perf_counter ()
174- run_git (git_bin , ["sparse-checkout" , "init" , "--cone" ], cwd = clone_dir , verbose = verbose )
175- run_git (git_bin , ["sparse-checkout" , "set" , "--" , source_path ], cwd = clone_dir , verbose = verbose )
183+ run_git (
184+ git_bin ,
185+ ["sparse-checkout" , "init" , "--cone" ],
186+ cwd = clone_dir ,
187+ verbose = verbose ,
188+ )
189+ run_git (
190+ git_bin ,
191+ ["sparse-checkout" , "set" , "--" , source_path ],
192+ cwd = clone_dir ,
193+ verbose = verbose ,
194+ )
176195 info (f"Step 2/6 complete in { time .perf_counter () - step_start :.1f} s" )
177196
178197 info ("Step 3/6: checking out requested ref/path" )
@@ -184,7 +203,12 @@ def export_directory(
184203 cwd = clone_dir ,
185204 verbose = verbose ,
186205 )
187- run_git (git_bin , ["checkout" , "--detach" , "FETCH_HEAD" ], cwd = clone_dir , verbose = verbose )
206+ run_git (
207+ git_bin ,
208+ ["checkout" , "--detach" , "FETCH_HEAD" ],
209+ cwd = clone_dir ,
210+ verbose = verbose ,
211+ )
188212 else :
189213 run_git (git_bin , ["checkout" ], cwd = clone_dir , verbose = verbose )
190214 info (f"Step 3/6 complete in { time .perf_counter () - step_start :.1f} s" )
@@ -215,114 +239,68 @@ def export_directory(
215239 copy_entry (child , output_dir / child .name )
216240 info (f"Step 6/6 complete in { time .perf_counter () - step_start :.1f} s" )
217241
218- # Explicitly ensure .git is never left in output.
219242 info ("Finalizing export (removing .git if present)" )
220243 shutil .rmtree (output_dir / ".git" , ignore_errors = True )
221- info (f"All done in { time .perf_counter () - start_total :.1f} s" )
244+ info (f"Export complete in { time .perf_counter () - start_total :.1f} s" )
222245
223246
224- def main () -> int :
247+ def build_parser () -> argparse . ArgumentParser :
225248 parser = argparse .ArgumentParser (
226- description = (
227- "Export one directory from a huge Git repository using treeless + sparse clone."
228- )
229- )
230- parser .add_argument (
231- "input" ,
232- help = (
233- "Either a repository URL (legacy mode) or a full GitHub directory URL, "
234- "e.g. https://github.com/apache/avro/lang/ruby"
235- ),
249+ description = "Export a directory from a GitHub repository"
236250 )
251+ parser .add_argument ("source" , help = "GitHub directory URL or repo URL" )
252+ parser .add_argument ("output" , help = "Output directory path" )
253+ parser .add_argument ("--ref" , default = None , help = "Git ref to checkout" )
237254 parser .add_argument (
238- "arg2" ,
239- help = (
240- "In URL mode: destination output directory. "
241- "In legacy mode: source directory path."
242- ),
255+ "--path" ,
256+ default = None ,
257+ help = "Directory path inside the repo (required for raw repo URLs)" ,
243258 )
259+ parser .add_argument ("--depth" , type = int , default = 1 , help = "Clone depth (default: 1)" )
260+ parser .add_argument ("--git" , default = "git" , help = "Git binary to use (default: git)" )
244261 parser .add_argument (
245- "arg3" ,
246- nargs = "?" ,
247- help = "Legacy mode only: destination output directory." ,
248- )
249- parser .add_argument (
250- "--source" ,
251- help = (
252- "Source directory path when using 2-arg mode with a repository URL input."
253- ),
254- )
255- parser .add_argument (
256- "--ref" ,
257- "-r" ,
258- help = "Branch/tag/ref to export (default: repository default branch)" ,
259- )
260- parser .add_argument (
261- "--depth" ,
262- type = int ,
263- default = 1 ,
264- help = "Fetch depth for clone/fetch (default: 1)" ,
265- )
266- parser .add_argument (
267- "--force" ,
268- "-f" ,
269- action = "store_true" ,
270- help = "Overwrite output directory if it already exists" ,
271- )
272- parser .add_argument (
273- "--git-bin" ,
274- default = "git" ,
275- help = "Git binary path/name (default: git)" ,
276- )
277- parser .add_argument (
278- "--verbose" ,
279- "-v" ,
280- action = "store_true" ,
281- help = "Print git commands while running" ,
262+ "--force" , action = "store_true" , help = "Overwrite output if it exists"
282263 )
264+ parser .add_argument ("--verbose" , action = "store_true" , help = "Print git commands" )
265+ return parser
283266
284- args = parser .parse_args ()
285267
286- if args .depth < 1 :
287- print ("Error: --depth must be >= 1" , file = sys .stderr )
288- return 2
268+ def main (argv : list [str ]) -> int :
269+ parser = build_parser ()
270+ args = parser .parse_args (argv )
271+
272+ output_dir = Path (args .output )
289273
290274 try :
291- parsed_ref : str | None = None
292- if args .arg3 is not None :
293- # Legacy mode: repo source output
294- repo_url = args .input
295- source_path = args .arg2
296- output_path = args .arg3
275+ if args .source .startswith ("https://github.com/" ):
276+ repo_url , source_path , inferred_ref = parse_github_directory_url (
277+ args .source
278+ )
279+ ref = args .ref if args .ref is not None else inferred_ref
297280 else :
298- # URL mode: input output
299- output_path = args . arg2
300- if args . source :
301- repo_url = args . input
302- source_path = args .source
303- else :
304- repo_url , source_path , parsed_ref = parse_github_directory_url ( args .input )
281+ if not args . path :
282+ raise GitExportError (
283+ "--path is required when source is not a GitHub directory URL"
284+ )
285+ repo_url = args .source
286+ source_path = normalize_source_path ( args . path )
287+ ref = args .ref
305288
306289 export_directory (
307290 repo_url = repo_url ,
308291 source_path = source_path ,
309- output_dir = Path ( output_path ) ,
310- ref = args . ref or parsed_ref ,
292+ output_dir = output_dir ,
293+ ref = ref ,
311294 depth = args .depth ,
312295 force = args .force ,
313- git_bin = args .git_bin ,
296+ git_bin = args .git ,
314297 verbose = args .verbose ,
315298 )
299+ return 0
316300 except GitExportError as e :
317301 print (f"Error: { e } " , file = sys .stderr )
318- return 1
319- except FileNotFoundError as e :
320- print (f"Error: unable to execute git binary '{ args .git_bin } ': { e } " , file = sys .stderr )
321- return 1
322-
323- print (f"Export complete: { Path (output_path ).resolve ()} " )
324- return 0
302+ return 2
325303
326304
327305if __name__ == "__main__" :
328- raise SystemExit (main ())
306+ raise SystemExit (main (sys . argv [ 1 :] ))
0 commit comments