Skip to content

Commit cd7ed7a

Browse files
committed
Add archetype catalog files merged logic for merged zips
1 parent 45d3210 commit cd7ed7a

File tree

2 files changed

+84
-6
lines changed

2 files changed

+84
-6
lines changed

charon/pkgs/maven.py

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,7 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str
690690

691691
total_copied = 0
692692
total_duplicated = 0
693+
total_merged = 0
693694
total_processed = 0
694695

695696
# Collect all extracted directories first
@@ -719,20 +720,23 @@ def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str
719720

720721
# Merge content from all extracted directories
721722
for extracted_dir in extracted_dirs:
722-
copied, duplicated, processed = _merge_directories_with_rename(
723+
copied, duplicated, merged, processed = _merge_directories_with_rename(
723724
extracted_dir, merged_dest_dir, root
724725
)
725726
total_copied += copied
726727
total_duplicated += duplicated
728+
total_merged += merged
727729
total_processed += processed
728730

729731
# Clean up temporary extraction directory
730732
rmtree(extracted_dir)
731733

732734
logger.info(
733-
"All zips merged! Total copied: %s, Total duplicated: %s, Total processed: %s",
735+
"All zips merged! Total copied: %s, Total duplicated: %s, "
736+
"Total merged: %s, Total processed: %s",
734737
total_copied,
735738
total_duplicated,
739+
total_merged,
736740
total_processed,
737741
)
738742
return final_tmp_root
@@ -743,10 +747,11 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
743747
* src_dir is the source directory to copy from
744748
* dest_dir is the destination directory to copy to.
745749
746-
Returns Tuple of (copied_count, duplicated_count, processed_count)
750+
Returns Tuple of (copied_count, duplicated_count, merged_count, processed_count)
747751
"""
748752
copied_count = 0
749753
duplicated_count = 0
754+
merged_count = 0
750755
processed_count = 0
751756

752757
# Find the actual content directory
@@ -772,23 +777,95 @@ def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str):
772777
for file in files:
773778
src_file = os.path.join(root_dir, file)
774779
dest_file = os.path.join(dest_root, file)
780+
781+
if file == ARCHETYPE_CATALOG_FILENAME:
782+
_handle_archetype_catalog_merge(src_file, dest_file)
783+
merged_count += 1
784+
logger.debug("Merged archetype catalog: %s -> %s", src_file, dest_file)
775785
if os.path.exists(dest_file):
776786
duplicated_count += 1
777787
logger.debug("Duplicated: %s, skipped", dest_file)
778788
else:
779-
copied_count += 1
780789
copy2(src_file, dest_file)
790+
copied_count += 1
781791
logger.debug("Copied: %s -> %s", src_file, dest_file)
782792

783793
processed_count += 1
784794

785795
logger.info(
786-
"One zip merged! Files copied: %s, Files duplicated: %s, Total files processed: %s",
796+
"One zip merged! Files copied: %s, Files duplicated: %s, "
797+
"Files merged: %s, Total files processed: %s",
787798
copied_count,
788799
duplicated_count,
800+
merged_count,
789801
processed_count,
790802
)
791-
return copied_count, duplicated_count, processed_count
803+
return copied_count, duplicated_count, merged_count, processed_count
804+
805+
806+
def _handle_archetype_catalog_merge(src_catalog: str, dest_catalog: str):
807+
"""
808+
Handle merging of archetype-catalog.xml files during directory merge.
809+
810+
Args:
811+
src_catalog: Source archetype-catalog.xml file path
812+
dest_catalog: Destination archetype-catalog.xml file path
813+
"""
814+
try:
815+
with open(src_catalog, "rb") as sf:
816+
src_archetypes = _parse_archetypes(sf.read())
817+
except ElementTree.ParseError as e:
818+
logger.warning("Failed to read source archetype catalog %s: %s", src_catalog, e)
819+
return
820+
821+
if len(src_archetypes) < 1:
822+
logger.warning(
823+
"No archetypes found in source archetype-catalog.xml: %s, "
824+
"even though the file exists! Skipping.",
825+
src_catalog
826+
)
827+
return
828+
829+
# Copy directly if dest_catalog doesn't exist
830+
if not os.path.exists(dest_catalog):
831+
copy2(src_catalog, dest_catalog)
832+
return
833+
834+
try:
835+
with open(dest_catalog, "rb") as df:
836+
dest_archetypes = _parse_archetypes(df.read())
837+
except ElementTree.ParseError as e:
838+
logger.warning("Failed to read dest archetype catalog %s: %s", dest_catalog, e)
839+
return
840+
841+
if len(dest_archetypes) < 1:
842+
logger.warning(
843+
"No archetypes found in dest archetype-catalog.xml: %s, "
844+
"even though the file exists! Copy directly from the src_catalog, %s.",
845+
dest_catalog, src_catalog
846+
)
847+
copy2(src_catalog, dest_catalog)
848+
return
849+
850+
else:
851+
original_dest_size = len(dest_archetypes)
852+
for sa in src_archetypes:
853+
if sa not in dest_archetypes:
854+
dest_archetypes.append(sa)
855+
else:
856+
logger.debug("DUPLICATE ARCHETYPE: %s", sa)
857+
858+
if len(dest_archetypes) != original_dest_size:
859+
with open(dest_catalog, 'wb'):
860+
content = MavenArchetypeCatalog(dest_archetypes).generate_meta_file_content()
861+
try:
862+
overwrite_file(dest_catalog, content)
863+
except FileNotFoundError as e:
864+
logger.error(
865+
"Error: Can not create file %s because of some missing folders",
866+
dest_catalog,
867+
)
868+
raise e
792869

793870

794871
def _scan_paths(files_root: str, ignore_patterns: List[str],

tests/test_maven_upload.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def test_multi_zips_upload(self):
168168

169169
catalog = self.test_bucket.Object(ARCHETYPE_CATALOG)
170170
cat_content = str(catalog.get()["Body"].read(), "utf-8")
171+
self.assertIn("<version>4.5.6</version>", cat_content)
171172
self.assertIn("<version>4.5.9</version>", cat_content)
172173
self.assertIn("<artifactId>httpclient</artifactId>", cat_content)
173174
self.assertIn("<groupId>org.apache.httpcomponents</groupId>", cat_content)

0 commit comments

Comments
 (0)