Skip to content

Commit c0b3ce7

Browse files
committed
Add archetype catalog files merged logic for merged zips
1 parent 45d3210 commit c0b3ce7

File tree

2 files changed

+82
-6
lines changed

2 files changed

+82
-6
lines changed

charon/pkgs/maven.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str
690690

691691
total_copied = 0
692692
total_duplicated = 0
693+
total_merged = 0
693694
total_processed = 0
694695

695696
# Collect all extracted directories first
@@ -719,20 +720,22 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str
719720

720721
# Merge content from all extracted directories
721722
for extracted_dir in extracted_dirs:
722-
copied, duplicated, processed = _merge_directories_with_rename(
723+
copied, duplicated, merged, processed = _merge_directories_with_rename(
723724
extracted_dir, merged_dest_dir, root
724725
)
725726
total_copied += copied
726727
total_duplicated += duplicated
728+
total_merged += merged
727729
total_processed += processed
728730

729731
# Clean up temporary extraction directory
730732
rmtree(extracted_dir)
731733

732734
logger.info(
733-
"All zips merged! Total copied: %s, Total duplicated: %s, Total processed: %s",
735+
"All zips merged! Total copied: %s, Total duplicated: %s, Total merged: %s, Total processed: %s",
734736
total_copied,
735737
total_duplicated,
738+
total_merged,
736739
total_processed,
737740
)
738741
return final_tmp_root
@@ -743,10 +746,11 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
743746
* src_dir is the source directory to copy from
744747
* dest_dir is the destination directory to copy to.
745748
746-
Returns Tuple of (copied_count, duplicated_count, processed_count)
749+
Returns Tuple of (copied_count, duplicated_count, merged_count, processed_count)
747750
"""
748751
copied_count = 0
749752
duplicated_count = 0
753+
merged_count = 0
750754
processed_count = 0
751755

752756
# Find the actual content directory
@@ -772,23 +776,94 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
772776
for file in files:
773777
src_file = os.path.join(root_dir, file)
774778
dest_file = os.path.join(dest_root, file)
779+
780+
if file == ARCHETYPE_CATALOG_FILENAME:
781+
_handle_archetype_catalog_merge(src_file, dest_file)
782+
merged_count += 1
783+
logger.debug("Merged archetype catalog: %s -> %s", src_file, dest_file)
775784
if os.path.exists(dest_file):
776785
duplicated_count += 1
777786
logger.debug("Duplicated: %s, skipped", dest_file)
778787
else:
779-
copied_count += 1
780788
copy2(src_file, dest_file)
789+
copied_count += 1
781790
logger.debug("Copied: %s -> %s", src_file, dest_file)
782791

783792
processed_count += 1
784793

785794
logger.info(
786-
"One zip merged! Files copied: %s, Files duplicated: %s, Total files processed: %s",
795+
"One zip merged! Files copied: %s, Files duplicated: %s, Files merged: %s, Total files processed: %s",
787796
copied_count,
788797
duplicated_count,
798+
merged_count,
789799
processed_count,
790800
)
791-
return copied_count, duplicated_count, processed_count
801+
return copied_count, duplicated_count, merged_count, processed_count
802+
803+
804+
def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str):
805+
"""
806+
Handle merging of archetype-catalog.xml files during directory merge.
807+
808+
Args:
809+
src_catalog: Source archetype-catalog.xml file path
810+
dest_catalog: Destination archetype-catalog.xml file path
811+
"""
812+
try:
813+
with open(src_catalog, "rb") as sf:
814+
src_archetypes = _parse_archetypes(sf.read())
815+
except ElementTree.ParseError as e:
816+
logger.warning("Failed to read source archetype catalog %s: %s", src_catalog, e)
817+
return
818+
819+
if len(src_archetypes) < 1:
820+
logger.warning(
821+
"No archetypes found in source archetype-catalog.xml: %s, "
822+
"even though the file exists! Skipping.",
823+
src_catalog
824+
)
825+
return
826+
827+
# Copy directly if dest_catalog doesn't exist
828+
if not os.path.exists(dest_catalog):
829+
copy2(src_catalog, dest_catalog)
830+
return
831+
832+
try:
833+
with open(dest_catalog, "rb") as df:
834+
dest_archetypes = _parse_archetypes(df.read())
835+
except ElementTree.ParseError as e:
836+
logger.warning("Failed to read dest archetype catalog %s: %s", dest_catalog, e)
837+
return
838+
839+
if len(dest_archetypes) < 1:
840+
logger.warning(
841+
"No archetypes found in dest archetype-catalog.xml: %s, "
842+
"even though the file exists! Copy directly from the src_catalog, %s.",
843+
dest_catalog, src_catalog
844+
)
845+
copy2(src_catalog, dest_catalog)
846+
return
847+
848+
else:
849+
original_dest_size = len(dest_archetypes)
850+
for sa in src_archetypes:
851+
if sa not in dest_archetypes:
852+
dest_archetypes.append(sa)
853+
else:
854+
logger.warning("DUPLICATE ARCHETYPE: %s. ",sa)
855+
856+
if len(dest_archetypes) != original_dest_size:
857+
with open(dest_catalog, 'wb') as f:
858+
content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content()
859+
try:
860+
overwrite_file(dest_catalog, content)
861+
except FileNotFoundError as e:
862+
logger.error(
863+
"Error: Can not create file %s because of some missing folders",
864+
dest_catalog,
865+
)
866+
raise e
792867

793868

794869
def _scan_paths(files_root: str, ignore_patterns: List[str],

tests/test_maven_upload.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def test_multi_zips_upload(self):
168168

169169
catalog = self.test_bucket.Object(ARCHETYPE_CATALOG)
170170
cat_content = str(catalog.get()["Body"].read(), "utf-8")
171+
self.assertIn("<version>4.5.7</version>", cat_content)
171172
self.assertIn("<version>4.5.9</version>", cat_content)
172173
self.assertIn("<artifactId>httpclient</artifactId>", cat_content)
173174
self.assertIn("<groupId>org.apache.httpcomponents</groupId>", cat_content)

0 commit comments

Comments
 (0)