|
37 | 37 | from datetime import datetime |
38 | 38 | from zipfile import ZipFile, BadZipFile |
39 | 39 | from tempfile import mkdtemp |
| 40 | +from shutil import rmtree, copy2 |
40 | 41 | from defusedxml import ElementTree |
41 | 42 |
|
42 | 43 | import os |
@@ -261,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) - |
261 | 262 |
|
262 | 263 |
|
263 | 264 | def handle_maven_uploading( |
264 | | - repo: str, |
| 265 | + repos: List[str], |
265 | 266 | prod_key: str, |
266 | 267 | ignore_patterns=None, |
267 | 268 | root="maven-repository", |
@@ -294,8 +295,9 @@ def handle_maven_uploading( |
294 | 295 | """ |
295 | 296 | if targets is None: |
296 | 297 | targets = [] |
297 | | - # 1. extract tarball |
298 | | - tmp_root = _extract_tarball(repo, prod_key, dir__=dir_) |
| 298 | + |
| 299 | + # 1. extract tarballs |
| 300 | + tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_) |
299 | 301 |
|
300 | 302 | # 2. scan for paths and filter out the ignored paths, |
301 | 303 | # and also collect poms for later metadata generation |
@@ -673,6 +675,195 @@ def _extract_tarball(repo: str, prefix="", dir__=None) -> str: |
673 | 675 | sys.exit(1) |
674 | 676 |
|
675 | 677 |
|
| 678 | +def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str: |
| 679 | + """ Extract multiple zip archives to a temporary directory. |
| 680 | + * repos are the list of repo paths to extract |
| 681 | + * root is a prefix in the tarball to identify which path is |
| 682 | + the beginning of the maven GAV path |
| 683 | + * prefix is the prefix for temporary directory name |
| 684 | + * dir__ is the directory where temporary directories will be created. |
| 685 | +
|
| 686 | + Returns the path to the merged temporary directory containing all extracted files |
| 687 | + """ |
| 688 | + # Create final merge directory |
| 689 | + final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__) |
| 690 | + |
| 691 | + total_copied = 0 |
| 692 | + total_duplicated = 0 |
| 693 | + total_merged = 0 |
| 694 | + total_processed = 0 |
| 695 | + |
| 696 | + # Collect all extracted directories first |
| 697 | + extracted_dirs = [] |
| 698 | + |
| 699 | + for repo in repos: |
| 700 | + if os.path.exists(repo): |
| 701 | + try: |
| 702 | + logger.info("Extracting tarball %s", repo) |
| 703 | + repo_zip = ZipFile(repo) |
| 704 | + tmp_root = mkdtemp(prefix=f"charon-{prefix}-", dir=dir__) |
| 705 | + extract_zip_all(repo_zip, tmp_root) |
| 706 | + extracted_dirs.append(tmp_root) |
| 707 | + |
| 708 | + except BadZipFile as e: |
| 709 | + logger.error("Tarball extraction error for repo %s: %s", repo, e) |
| 710 | + sys.exit(1) |
| 711 | + else: |
| 712 | + logger.error("Error: archive %s does not exist", repo) |
| 713 | + sys.exit(1) |
| 714 | + |
| 715 | + # Merge all extracted directories |
| 716 | + if extracted_dirs: |
| 717 | + # Create merged directory name |
| 718 | + merged_dir_name = "merged_repositories" |
| 719 | + merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name) |
| 720 | + |
| 721 | + # Merge content from all extracted directories |
| 722 | + for extracted_dir in extracted_dirs: |
| 723 | + copied, duplicated, merged, processed = _merge_directories_with_rename( |
| 724 | + extracted_dir, merged_dest_dir, root |
| 725 | + ) |
| 726 | + total_copied += copied |
| 727 | + total_duplicated += duplicated |
| 728 | + total_merged += merged |
| 729 | + total_processed += processed |
| 730 | + |
| 731 | + # Clean up temporary extraction directory |
| 732 | + rmtree(extracted_dir) |
| 733 | + |
| 734 | + logger.info( |
| 735 | + "All zips merged! Total copied: %s, Total duplicated: %s, " |
| 736 | + "Total merged: %s, Total processed: %s", |
| 737 | + total_copied, |
| 738 | + total_duplicated, |
| 739 | + total_merged, |
| 740 | + total_processed, |
| 741 | + ) |
| 742 | + return final_tmp_root |
| 743 | + |
| 744 | + |
| 745 | +def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): |
| 746 | + """ Recursively copy files from src_dir to dest_dir, overwriting existing files. |
| 747 | + * src_dir is the source directory to copy from |
| 748 | + * dest_dir is the destination directory to copy to. |
| 749 | +
|
| 750 | + Returns Tuple of (copied_count, duplicated_count, merged_count, processed_count) |
| 751 | + """ |
| 752 | + copied_count = 0 |
| 753 | + duplicated_count = 0 |
| 754 | + merged_count = 0 |
| 755 | + processed_count = 0 |
| 756 | + |
| 757 | + # Find the actual content directory |
| 758 | + content_root = src_dir |
| 759 | + for item in os.listdir(src_dir): |
| 760 | + item_path = os.path.join(src_dir, item) |
| 761 | + # Check the root maven-repository subdirectory existence |
| 762 | + maven_repo_path = os.path.join(item_path, root) |
| 763 | + if os.path.isdir(item_path) and os.path.exists(maven_repo_path): |
| 764 | + content_root = item_path |
| 765 | + break |
| 766 | + |
| 767 | + # pylint: disable=unused-variable |
| 768 | + for root_dir, dirs, files in os.walk(content_root): |
| 769 | + # Calculate relative path from content root |
| 770 | + rel_path = os.path.relpath(root_dir, content_root) |
| 771 | + dest_root = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir |
| 772 | + |
| 773 | + # Create destination directory if it doesn't exist |
| 774 | + os.makedirs(dest_root, exist_ok=True) |
| 775 | + |
| 776 | + # Copy all files, skip existing ones |
| 777 | + for file in files: |
| 778 | + src_file = os.path.join(root_dir, file) |
| 779 | + dest_file = os.path.join(dest_root, file) |
| 780 | + |
| 781 | + if file == ARCHETYPE_CATALOG_FILENAME: |
| 782 | + _handle_archetype_catalog_merge(src_file, dest_file) |
| 783 | + merged_count += 1 |
| 784 | + logger.debug("Merged archetype catalog: %s -> %s", src_file, dest_file) |
| 785 | + if os.path.exists(dest_file): |
| 786 | + duplicated_count += 1 |
| 787 | + logger.debug("Duplicated: %s, skipped", dest_file) |
| 788 | + else: |
| 789 | + copy2(src_file, dest_file) |
| 790 | + copied_count += 1 |
| 791 | + logger.debug("Copied: %s -> %s", src_file, dest_file) |
| 792 | + |
| 793 | + processed_count += 1 |
| 794 | + |
| 795 | + logger.info( |
| 796 | + "One zip merged! Files copied: %s, Files duplicated: %s, " |
| 797 | + "Files merged: %s, Total files processed: %s", |
| 798 | + copied_count, |
| 799 | + duplicated_count, |
| 800 | + merged_count, |
| 801 | + processed_count, |
| 802 | + ) |
| 803 | + return copied_count, duplicated_count, merged_count, processed_count |
| 804 | + |
| 805 | + |
| 806 | +def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str): |
| 807 | + """ |
| 808 | + Handle merging of archetype-catalog.xml files during directory merge. |
| 809 | +
|
| 810 | + Args: |
| 811 | + src_catalog: Source archetype-catalog.xml file path |
| 812 | + dest_catalog: Destination archetype-catalog.xml file path |
| 813 | + """ |
| 814 | + try: |
| 815 | + with open(src_catalog, "rb") as sf: |
| 816 | + src_archetypes = _parse_archetypes(sf.read()) |
| 817 | + except ElementTree.ParseError as e: |
| 818 | + logger.warning("Failed to read source archetype catalog %s: %s", src_catalog, e) |
| 819 | + return |
| 820 | + |
| 821 | + if len(src_archetypes) < 1: |
| 822 | + logger.warning( |
| 823 | + "No archetypes found in source archetype-catalog.xml: %s, " |
| 824 | + "even though the file exists! Skipping.", |
| 825 | + src_catalog |
| 826 | + ) |
| 827 | + return |
| 828 | + |
| 829 | + # Copy directly if dest_catalog doesn't exist |
| 830 | + if not os.path.exists(dest_catalog): |
| 831 | + copy2(src_catalog, dest_catalog) |
| 832 | + return |
| 833 | + |
| 834 | + try: |
| 835 | + with open(dest_catalog, "rb") as df: |
| 836 | + dest_archetypes = _parse_archetypes(df.read()) |
| 837 | + except ElementTree.ParseError as e: |
| 838 | + logger.warning("Failed to read dest archetype catalog %s: %s", dest_catalog, e) |
| 839 | + return |
| 840 | + |
| 841 | + if len(dest_archetypes) < 1: |
| 842 | + logger.warning( |
| 843 | + "No archetypes found in dest archetype-catalog.xml: %s, " |
| 844 | + "even though the file exists! Copy directly from the src_catalog, %s.", |
| 845 | + dest_catalog, src_catalog |
| 846 | + ) |
| 847 | + copy2(src_catalog, dest_catalog) |
| 848 | + return |
| 849 | + |
| 850 | + else: |
| 851 | + original_dest_size = len(dest_archetypes) |
| 852 | + for sa in src_archetypes: |
| 853 | + if sa not in dest_archetypes: |
| 854 | + dest_archetypes.append(sa) |
| 855 | + else: |
| 856 | + logger.debug("DUPLICATE ARCHETYPE: %s", sa) |
| 857 | + |
| 858 | + if len(dest_archetypes) != original_dest_size: |
| 859 | + content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content() |
| 860 | + try: |
| 861 | + overwrite_file(dest_catalog, content) |
| 862 | + except Exception as e: |
| 863 | + logger.error("Failed to merge archetype catalog: %s", dest_catalog) |
| 864 | + raise e |
| 865 | + |
| 866 | + |
676 | 867 | def _scan_paths(files_root: str, ignore_patterns: List[str], |
677 | 868 | root: str) -> Tuple[str, List[str], List[str], List[str]]: |
678 | 869 | # 2. scan for paths and filter out the ignored paths, |
|
0 commit comments