@@ -471,7 +471,9 @@ def fake_run(cmd, cwd=None, env=None):
471471 payload = {"artifacts" : {"output_triples" : {"TOTAL" : 17 }}}
472472 run_metrics_dir = metrics_dir / run_id
473473 run_metrics_dir .mkdir (parents = True , exist_ok = True )
474- (run_metrics_dir / f"conversion-metrics-{ out_name } -{ run_id } .json" ).write_text (
474+ conversion_metrics_dir = run_metrics_dir / "conversion_metrics" / out_name
475+ conversion_metrics_dir .mkdir (parents = True , exist_ok = True )
476+ (conversion_metrics_dir / run_id ).write_text (
475477 json .dumps (payload ),
476478 encoding = "utf-8" ,
477479 )
@@ -840,6 +842,58 @@ def fake_run(cmd, cwd=None, env=None):
840842 self .assertIn ("/data/in/part-00001.nt" , gzip_cmds [1 ][- 1 ])
841843 self .assertEqual (out_buf .getvalue ().count ("* Output directory:" ), 1 )
842844
845+ def test_main_full_mode_batch_metrics_upsert_is_sample_scoped (self ):
846+ """Batch layout writes compression CSV metrics once per sample, not once per RDF part."""
847+ with tempfile .TemporaryDirectory () as td :
848+ tmp_path = Path (td )
849+ input_dir , rules_path = prepare_inputs (tmp_path )
850+ out_dir = tmp_path / "out"
851+ seen_output_names = []
852+
853+ def fake_run (cmd , cwd = None , env = None ):
854+ if "/opt/vcf-rdfizer/run_conversion.sh" in cmd :
855+ sample_dir = out_dir / "sample"
856+ sample_dir .mkdir (parents = True , exist_ok = True )
857+ (sample_dir / "part-00000.nt" ).write_text ("<s1> <p> <o> .\n " )
858+ (sample_dir / "part-00001.nt" ).write_text ("<s2> <p> <o> .\n " )
859+ return 0
860+
861+ def fake_update_metrics_csv_with_compression (** kwargs ):
862+ seen_output_names .append (kwargs ["output_name" ])
863+
864+ old_cwd = os .getcwd ()
865+ os .chdir (tmp_path )
866+ try :
867+ with mock .patch .object (vcf_rdfizer , "run" , side_effect = fake_run ), mock .patch .object (
868+ vcf_rdfizer , "check_docker" , return_value = True
869+ ), mock .patch .object (
870+ vcf_rdfizer , "docker_image_exists" , return_value = True
871+ ), mock .patch .object (
872+ vcf_rdfizer , "discover_tsv_triplets" , return_value = mocked_triplets ()
873+ ), mock .patch .object (
874+ vcf_rdfizer , "update_metrics_csv_with_compression" , side_effect = fake_update_metrics_csv_with_compression
875+ ):
876+ rc = invoke_main (
877+ [
878+ "--input" ,
879+ str (input_dir ),
880+ "--rules" ,
881+ str (rules_path ),
882+ "--rdf-layout" ,
883+ "batch" ,
884+ "--compression" ,
885+ "gzip" ,
886+ "--out" ,
887+ str (out_dir ),
888+ "--keep-tsv" ,
889+ ]
890+ )
891+ finally :
892+ os .chdir (old_cwd )
893+
894+ self .assertEqual (rc , 0 )
895+ self .assertEqual (seen_output_names , ["sample" ])
896+
843897 def test_main_full_mode_aggregate_layout_sets_merge_flag (self ):
844898 """Aggregate layout passes AGGREGATE_RDF=1 to conversion step."""
845899 with tempfile .TemporaryDirectory () as td :
@@ -1151,10 +1205,10 @@ def fake_run(cmd, cwd=None, env=None):
11511205 self .assertIn ("sample" , csv_text )
11521206 self .assertIn ("hdt" , csv_text )
11531207
1154- json_files = list ( run_metrics_dir . glob ( "compression-metrics- sample-*.json" ))
1155- time_files = list ( run_metrics_dir . glob ( "compression-time- hdt- sample-*.txt" ))
1156- self .assertTrue (json_files )
1157- self .assertTrue (time_files )
1208+ json_file = run_metrics_dir / "compression_metrics" / " sample" / run_metrics_dir . name
1209+ time_file = run_metrics_dir / "compression_time" / " hdt" / " sample" / run_metrics_dir . name
1210+ self .assertTrue (json_file . exists () )
1211+ self .assertTrue (time_file . exists () )
11581212
11591213 def test_main_full_mode_deletes_nt_with_docker_fallback_on_permission_error (self ):
11601214 """Full mode falls back to Docker-based removal when .nt unlink raises PermissionError."""
0 commit comments