diff --git a/README.md b/README.md index 65f445d..1a2ecb8 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,56 @@ search engine can be when built with the high-level helpers in `openms_python`. Reuse the test as inspiration for bespoke pipelines or as a regression harness when experimenting with search-related utilities. +## Experimental design support + +Managing multi-sample, multi-fraction experiments? The `Py_ExperimentalDesign` +wrapper makes it straightforward to work with OpenMS experimental design files +that describe sample layouts, fractionation schemes, and labeling strategies. +`tests/test_py_experimentaldesign.py` provides comprehensive examples of loading +and querying experimental designs, including support for fractionated workflows, +label-free and labeled quantitation setups, and integration with feature maps, +consensus maps, and identification results. The wrapper exposes Pythonic +properties for quick access to sample counts, fraction information, and design +summaries—perfect for building sample-aware quantitation pipelines or validating +experimental metadata before analysis. + +```python +from openms_python import Py_ExperimentalDesign +import pandas as pd + +# Load an experimental design from a TSV file +design = Py_ExperimentalDesign.from_file("design.tsv") + +# Quick access to design properties +print(f"Samples: {design.n_samples}") +print(f"MS files: {design.n_ms_files}") +print(f"Fractionated: {design.is_fractionated}") + +# Get a summary +design.print_summary() + +# Convert to pandas DataFrame for analysis +df = design.to_dataframe() + +# Create from a pandas DataFrame +df = pd.DataFrame({ + 'Fraction_Group': [1, 1, 2, 2], + 'Fraction': [1, 2, 1, 2], + 'Spectra_Filepath': ['f1.mzML', 'f2.mzML', 'f3.mzML', 'f4.mzML'], + 'Label': [1, 1, 1, 1], + 'Sample': [1, 1, 2, 2] +}) +design = Py_ExperimentalDesign.from_dataframe(df) + +# Store to file +design.store("output_design.tsv") + +# Create from existing OpenMS objects +from openms_python import Py_ConsensusMap +consensus = Py_ConsensusMap.from_file("results.consensusXML") +design = Py_ExperimentalDesign.from_consensus_map(consensus) +``` + ### Iterate over containers and metadata All sequence-like wrappers (feature maps, consensus maps, identification containers, diff --git a/openms_python/__init__.py b/openms_python/__init__.py index 3400c09..750d0d7 100644 --- a/openms_python/__init__.py +++ b/openms_python/__init__.py @@ -26,6 +26,7 @@ from .py_feature import Py_Feature from .py_featuremap import Py_FeatureMap from .py_consensusmap import Py_ConsensusMap +from .py_experimentaldesign import Py_ExperimentalDesign from .py_identifications import ( ProteinIdentifications, PeptideIdentifications, @@ -101,6 +102,7 @@ def get_example(name: str, *, load: bool = False, target_dir: Union[str, Path, N "Py_Feature", "Py_FeatureMap", "Py_ConsensusMap", + "Py_ExperimentalDesign", "ProteinIdentifications", "PeptideIdentifications", "Identifications", diff --git a/openms_python/examples/experimental_design.tsv b/openms_python/examples/experimental_design.tsv new file mode 100644 index 0000000..2d7dac6 --- /dev/null +++ b/openms_python/examples/experimental_design.tsv @@ -0,0 +1,7 @@ +Fraction_Group Fraction Spectra_Filepath Label Sample +1 1 sample1_fraction1.mzML 1 1 +1 2 sample1_fraction2.mzML 1 1 +1 3 sample1_fraction3.mzML 1 1 +2 1 sample2_fraction1.mzML 1 2 +2 2 sample2_fraction2.mzML 1 2 +2 3 sample2_fraction3.mzML 1 2 diff --git a/openms_python/py_experimentaldesign.py b/openms_python/py_experimentaldesign.py new file mode 100644 index 0000000..ce399c8 --- /dev/null +++ b/openms_python/py_experimentaldesign.py @@ -0,0 +1,412 @@ +"""Pythonic wrapper for :class:`pyopenms.ExperimentalDesign`.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Union, Optional, Set, TYPE_CHECKING + +import pandas as pd +import pyopenms as oms + +from ._io_utils import ensure_allowed_suffix + +if TYPE_CHECKING: + from .py_consensusmap import Py_ConsensusMap + from .py_featuremap import Py_FeatureMap + +# Supported file extensions for experimental design +EXPERIMENTAL_DESIGN_EXTENSIONS = {".tsv"} + + +class Py_ExperimentalDesign: + """A Pythonic wrapper around :class:`pyopenms.ExperimentalDesign`. + + This class provides convenient methods for loading, storing, and working with + experimental design files in OpenMS format. + + Example: + >>> from openms_python import Py_ExperimentalDesign + >>> design = Py_ExperimentalDesign.from_file("design.tsv") + >>> print(f"Samples: {design.n_samples}, MS files: {design.n_ms_files}") + """ + + def __init__(self, native_design: Optional[oms.ExperimentalDesign] = None): + """Initialize with an optional native ExperimentalDesign object. + + Parameters + ---------- + native_design: + Optional :class:`pyopenms.ExperimentalDesign` to wrap. + """ + self._design = native_design if native_design is not None else oms.ExperimentalDesign() + + @classmethod + def from_file(cls, filepath: Union[str, Path]) -> "Py_ExperimentalDesign": + """Load an experimental design from a TSV file. + + Parameters + ---------- + filepath: + Path to the experimental design TSV file. + + Returns + ------- + Py_ExperimentalDesign + A new instance with the loaded design. + + Example: + >>> design = Py_ExperimentalDesign.from_file("design.tsv") + """ + instance = cls() + instance.load(filepath) + return instance + + def load(self, filepath: Union[str, Path]) -> "Py_ExperimentalDesign": + """Load an experimental design from disk. + + Parameters + ---------- + filepath: + Path to the experimental design TSV file. + + Returns + ------- + Py_ExperimentalDesign + Self for method chaining. + """ + ensure_allowed_suffix(filepath, EXPERIMENTAL_DESIGN_EXTENSIONS, "ExperimentalDesign") + edf = oms.ExperimentalDesignFile() + self._design = edf.load(str(filepath), False) + return self + + def store(self, filepath: Union[str, Path]) -> "Py_ExperimentalDesign": + """Store the experimental design to disk as a TSV file. + + The design is converted to a DataFrame and written in the format + expected by OpenMS ExperimentalDesignFile. + + Parameters + ---------- + filepath: + Path where the experimental design should be saved. + + Returns + ------- + Py_ExperimentalDesign + Self for method chaining. + + Example: + >>> design = Py_ExperimentalDesign.from_file("input.tsv") + >>> design.store("output.tsv") + """ + ensure_allowed_suffix(filepath, EXPERIMENTAL_DESIGN_EXTENSIONS, "ExperimentalDesign") + df = self.to_dataframe() + df.to_csv(str(filepath), sep="\t", index=False) + return self + + @property + def native(self) -> oms.ExperimentalDesign: + """Return the underlying :class:`pyopenms.ExperimentalDesign`.""" + return self._design + + # ==================== Properties ==================== + + @property + def n_samples(self) -> int: + """Number of samples in the experimental design.""" + return self._design.getNumberOfSamples() + + @property + def n_ms_files(self) -> int: + """Number of MS files in the experimental design.""" + return self._design.getNumberOfMSFiles() + + @property + def n_fractions(self) -> int: + """Number of fractions in the experimental design.""" + return self._design.getNumberOfFractions() + + @property + def n_fraction_groups(self) -> int: + """Number of fraction groups in the experimental design.""" + return self._design.getNumberOfFractionGroups() + + @property + def n_labels(self) -> int: + """Number of labels in the experimental design.""" + return self._design.getNumberOfLabels() + + @property + def is_fractionated(self) -> bool: + """Whether the experimental design includes fractionation.""" + return self._design.isFractionated() + + @property + def same_n_ms_files_per_fraction(self) -> bool: + """Whether all fractions have the same number of MS files.""" + return self._design.sameNrOfMSFilesPerFraction() + + @property + def samples(self) -> Set[str]: + """Set of sample identifiers in the design. + + Returns + ------- + Set[str] + Set of sample identifiers. + """ + sample_section = self._design.getSampleSection() + samples = sample_section.getSamples() + # Convert bytes to str if needed + return {s.decode() if isinstance(s, bytes) else str(s) for s in samples} + + # ==================== Summary methods ==================== + + def summary(self) -> dict: + """Get a summary of the experimental design. + + Returns + ------- + dict + Dictionary with summary statistics. + """ + return { + "n_samples": self.n_samples, + "n_ms_files": self.n_ms_files, + "n_fractions": self.n_fractions, + "n_fraction_groups": self.n_fraction_groups, + "n_labels": self.n_labels, + "is_fractionated": self.is_fractionated, + "samples": sorted(self.samples), + } + + def print_summary(self) -> None: + """Print a formatted summary of the experimental design.""" + summary = self.summary() + print("Experimental Design Summary") + print("=" * 40) + print(f"Samples: {summary['n_samples']}") + print(f"MS Files: {summary['n_ms_files']}") + print(f"Fractions: {summary['n_fractions']}") + print(f"Fraction Groups: {summary['n_fraction_groups']}") + print(f"Labels: {summary['n_labels']}") + print(f"Fractionated: {summary['is_fractionated']}") + if summary["samples"]: + print(f"Sample IDs: {', '.join(summary['samples'])}") + + # ==================== Factory methods ==================== + + @classmethod + def from_consensus_map( + cls, consensus_map: Union["Py_ConsensusMap", oms.ConsensusMap] + ) -> "Py_ExperimentalDesign": + """Create an ExperimentalDesign from a ConsensusMap. + + Parameters + ---------- + consensus_map: + A :class:`Py_ConsensusMap` or :class:`pyopenms.ConsensusMap`. + + Returns + ------- + Py_ExperimentalDesign + A new instance derived from the consensus map. + """ + # Handle both Py_ConsensusMap and native ConsensusMap + native_map = consensus_map.native if hasattr(consensus_map, "native") else consensus_map + design = oms.ExperimentalDesign.fromConsensusMap(native_map) + return cls(design) + + @classmethod + def from_feature_map( + cls, feature_map: Union["Py_FeatureMap", oms.FeatureMap] + ) -> "Py_ExperimentalDesign": + """Create an ExperimentalDesign from a FeatureMap. + + Parameters + ---------- + feature_map: + A :class:`Py_FeatureMap` or :class:`pyopenms.FeatureMap`. + + Returns + ------- + Py_ExperimentalDesign + A new instance derived from the feature map. + """ + # Handle both Py_FeatureMap and native FeatureMap + native_map = feature_map.native if hasattr(feature_map, "native") else feature_map + design = oms.ExperimentalDesign.fromFeatureMap(native_map) + return cls(design) + + @classmethod + def from_identifications(cls, protein_ids: list) -> "Py_ExperimentalDesign": + """Create an ExperimentalDesign from protein identification data. + + Parameters + ---------- + protein_ids: + List of :class:`pyopenms.ProteinIdentification` objects. + + Returns + ------- + Py_ExperimentalDesign + A new instance derived from the identifications. + """ + design = oms.ExperimentalDesign.fromIdentifications(protein_ids) + return cls(design) + + @classmethod + def from_dataframe(cls, df: pd.DataFrame) -> "Py_ExperimentalDesign": + """Create an ExperimentalDesign from a pandas DataFrame. + + Parameters + ---------- + df: + DataFrame with columns: Fraction_Group, Fraction, Spectra_Filepath, + Label, Sample. + + Returns + ------- + Py_ExperimentalDesign + A new instance created from the DataFrame. + + Raises + ------ + ValueError + If required columns are missing from the DataFrame. + + Example: + >>> import pandas as pd + >>> df = pd.DataFrame({ + ... 'Fraction_Group': [1, 1, 2, 2], + ... 'Fraction': [1, 2, 1, 2], + ... 'Spectra_Filepath': ['f1.mzML', 'f2.mzML', 'f3.mzML', 'f4.mzML'], + ... 'Label': [1, 1, 1, 1], + ... 'Sample': [1, 1, 2, 2] + ... }) + >>> design = Py_ExperimentalDesign.from_dataframe(df) + """ + import tempfile + + required_columns = { + "Fraction_Group", + "Fraction", + "Spectra_Filepath", + "Label", + "Sample", + } + missing = required_columns - set(df.columns) + if missing: + missing_str = ", ".join(sorted(missing)) + raise ValueError( + f"DataFrame is missing required columns: {missing_str}. " + f"Required columns are: {', '.join(sorted(required_columns))}" + ) + + # Write DataFrame to a temporary TSV file and load it + # This ensures proper sample section setup by the OpenMS loader + with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f: + # Write in the expected format + df.to_csv(f, sep="\t", index=False) + temp_path = f.name + + try: + edf = oms.ExperimentalDesignFile() + design = edf.load(temp_path, False) + return cls(design) + finally: + Path(temp_path).unlink() + + @classmethod + def from_df(cls, df: pd.DataFrame) -> "Py_ExperimentalDesign": + """Alias for :meth:`from_dataframe` matching :meth:`get_df`. + + Parameters + ---------- + df: + DataFrame with experimental design data. + + Returns + ------- + Py_ExperimentalDesign + A new instance created from the DataFrame. + """ + return cls.from_dataframe(df) + + def to_dataframe(self) -> pd.DataFrame: + """Convert the ExperimentalDesign to a pandas DataFrame. + + Returns + ------- + pd.DataFrame + DataFrame with columns: Fraction_Group, Fraction, Spectra_Filepath, + Label, Sample. + + Example: + >>> design = Py_ExperimentalDesign.from_file("design.tsv") + >>> df = design.to_dataframe() + """ + ms_files = self._design.getMSFileSection() + + # Get sample IDs from the sample section + sample_section = self._design.getSampleSection() + sample_ids = sorted(sample_section.getSamples()) + # Create index-to-id mapping (0-based index to actual sample ID) + index_to_sample_id = {} + for sample_id in sample_ids: + # Decode if bytes + if isinstance(sample_id, bytes): + sample_id_str = sample_id.decode() + else: + sample_id_str = str(sample_id) + # Try to convert to int if possible + try: + sample_id_value = int(sample_id_str) + except ValueError: + sample_id_value = sample_id_str + index_to_sample_id[len(index_to_sample_id)] = sample_id_value + + data = { + "Fraction_Group": [], + "Fraction": [], + "Spectra_Filepath": [], + "Label": [], + "Sample": [], + } + + for entry in ms_files: + data["Fraction_Group"].append(entry.fraction_group) + data["Fraction"].append(entry.fraction) + # Decode path if it's bytes + path = entry.path + if isinstance(path, bytes): + path = path.decode() + data["Spectra_Filepath"].append(path) + data["Label"].append(entry.label) + # Map 0-based index back to actual sample ID + data["Sample"].append(index_to_sample_id.get(entry.sample, entry.sample)) + + return pd.DataFrame(data) + + def get_df(self) -> pd.DataFrame: + """Alias for :meth:`to_dataframe`. + + Returns + ------- + pd.DataFrame + DataFrame with experimental design data. + """ + return self.to_dataframe() + + # ==================== Delegation ==================== + + def __getattr__(self, name: str): + """Delegate attribute access to the underlying ExperimentalDesign.""" + return getattr(self._design, name) + + def __repr__(self) -> str: + """String representation of the ExperimentalDesign.""" + return ( + f"Py_ExperimentalDesign(samples={self.n_samples}, " + f"ms_files={self.n_ms_files}, fractionated={self.is_fractionated})" + ) diff --git a/pyproject.toml b/pyproject.toml index 0ce5c1c..53b7e32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ where = ["."] include = ["openms_python*"] [tool.setuptools.package-data] -"openms_python" = ["examples/*.mzML"] +"openms_python" = ["examples/*.mzML", "examples/*.tsv"] [tool.black] line-length = 100 diff --git a/tests/test_py_experimentaldesign.py b/tests/test_py_experimentaldesign.py new file mode 100644 index 0000000..623daeb --- /dev/null +++ b/tests/test_py_experimentaldesign.py @@ -0,0 +1,389 @@ +"""Tests for :class:`Py_ExperimentalDesign`.""" + +from pathlib import Path +import tempfile + +import pandas as pd +import pytest +import pyopenms as oms + +from openms_python import Py_ExperimentalDesign, get_example + + +def test_py_experimentaldesign_load_from_file(): + """Test loading an experimental design from the bundled example.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + assert design.n_samples == 2 + assert design.n_ms_files == 6 + assert design.n_fractions == 3 + assert design.n_fraction_groups == 2 + assert design.n_labels == 1 + assert design.is_fractionated is True + + +def test_py_experimentaldesign_properties(): + """Test that properties work correctly.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + # Test all properties + assert isinstance(design.n_samples, int) + assert isinstance(design.n_ms_files, int) + assert isinstance(design.n_fractions, int) + assert isinstance(design.n_fraction_groups, int) + assert isinstance(design.n_labels, int) + assert isinstance(design.is_fractionated, bool) + assert isinstance(design.same_n_ms_files_per_fraction, bool) + + # Test samples property + samples = design.samples + assert isinstance(samples, set) + assert len(samples) == 2 + assert "1" in samples + assert "2" in samples + + +def test_py_experimentaldesign_summary(): + """Test summary method returns expected structure.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + summary = design.summary() + + assert isinstance(summary, dict) + assert "n_samples" in summary + assert "n_ms_files" in summary + assert "n_fractions" in summary + assert "n_fraction_groups" in summary + assert "n_labels" in summary + assert "is_fractionated" in summary + assert "samples" in summary + + assert summary["n_samples"] == 2 + assert summary["n_ms_files"] == 6 + assert summary["samples"] == ["1", "2"] + + +def test_py_experimentaldesign_print_summary(capsys): + """Test that print_summary produces output.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + design.print_summary() + + captured = capsys.readouterr() + assert "Experimental Design Summary" in captured.out + assert "Samples: 2" in captured.out + assert "MS Files: 6" in captured.out + assert "Fractionated: True" in captured.out + + +def test_py_experimentaldesign_load_method(): + """Test that load method works and returns self.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign() + + result = design.load(example_path) + + assert result is design + assert design.n_samples == 2 + + +def test_py_experimentaldesign_native_property(): + """Test that native property returns pyopenms object.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + native = design.native + + assert isinstance(native, oms.ExperimentalDesign) + assert native.getNumberOfSamples() == 2 + + +def test_py_experimentaldesign_repr(): + """Test string representation.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + repr_str = repr(design) + + assert "Py_ExperimentalDesign" in repr_str + assert "samples=2" in repr_str + assert "ms_files=6" in repr_str + assert "fractionated=True" in repr_str + + +def test_py_experimentaldesign_invalid_extension(): + """Test that loading a file with wrong extension raises error.""" + with tempfile.NamedTemporaryFile(suffix=".txt", mode="w", delete=False) as f: + f.write("test") + temp_path = f.name + + try: + with pytest.raises(ValueError, match="ExperimentalDesign"): + Py_ExperimentalDesign.from_file(temp_path) + finally: + Path(temp_path).unlink() + + +def test_py_experimentaldesign_store_roundtrip(): + """Test that store method works and roundtrips correctly.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + with tempfile.NamedTemporaryFile(suffix=".tsv", delete=False) as f: + temp_path = f.name + + try: + # Store the design + result = design.store(temp_path) + assert result is design # Check method chaining + + # Verify file was created + assert Path(temp_path).exists() + + # Load it back and verify + design2 = Py_ExperimentalDesign.from_file(temp_path) + assert design2.n_samples == design.n_samples + assert design2.n_ms_files == design.n_ms_files + assert design2.n_fractions == design.n_fractions + assert design2.n_fraction_groups == design.n_fraction_groups + assert design2.n_labels == design.n_labels + + # Verify DataFrame content matches + df1 = design.to_dataframe() + df2 = design2.to_dataframe() + assert df1.equals(df2) + finally: + if Path(temp_path).exists(): + Path(temp_path).unlink() + + +def test_py_experimentaldesign_store_invalid_extension(): + """Test that store rejects invalid file extensions.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as f: + temp_path = f.name + + try: + with pytest.raises(ValueError, match="ExperimentalDesign"): + design.store(temp_path) + finally: + if Path(temp_path).exists(): + Path(temp_path).unlink() + + +def test_py_experimentaldesign_delegation(): + """Test that methods are delegated to underlying object.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + # Test delegation of a native method + ms_file_section = design.getMSFileSection() + assert isinstance(ms_file_section, list) + assert len(ms_file_section) == 6 + + +def test_py_experimentaldesign_simple_design(): + """Test with a simple non-fractionated design.""" + # Create a simple experimental design + # Different fraction groups for different samples to avoid conflicts + tsv_content = """Fraction_Group\tFraction\tSpectra_Filepath\tLabel\tSample +1\t1\tfile1.mzML\t1\t1 +2\t1\tfile2.mzML\t1\t2 +""" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".tsv", delete=False) as f: + f.write(tsv_content) + temp_path = f.name + + try: + design = Py_ExperimentalDesign.from_file(temp_path) + + assert design.n_samples == 2 + assert design.n_ms_files == 2 + assert design.n_fractions == 1 + assert design.n_labels == 1 + + samples = design.samples + assert len(samples) == 2 + + finally: + Path(temp_path).unlink() + + +def test_py_experimentaldesign_from_consensus_map(): + """Test creating ExperimentalDesign from ConsensusMap.""" + # Create a minimal consensus map + consensus_map = oms.ConsensusMap() + + # Add some file descriptions + file_desc1 = oms.ProteinIdentification() + file_desc1.setIdentifier("file1") + file_desc2 = oms.ProteinIdentification() + file_desc2.setIdentifier("file2") + + consensus_map.setProteinIdentifications([file_desc1, file_desc2]) + + # Create design from consensus map + design = Py_ExperimentalDesign.from_consensus_map(consensus_map) + + assert isinstance(design, Py_ExperimentalDesign) + assert isinstance(design.native, oms.ExperimentalDesign) + + +def test_py_experimentaldesign_from_feature_map(): + """Test creating ExperimentalDesign from FeatureMap.""" + # Create a minimal feature map + feature_map = oms.FeatureMap() + + # Add protein identification for the file descriptor + prot_id = oms.ProteinIdentification() + prot_id.setIdentifier("sample1") + feature_map.setProteinIdentifications([prot_id]) + + # Create design from feature map + design = Py_ExperimentalDesign.from_feature_map(feature_map) + + assert isinstance(design, Py_ExperimentalDesign) + assert isinstance(design.native, oms.ExperimentalDesign) + + +def test_py_experimentaldesign_from_identifications(): + """Test creating ExperimentalDesign from identification data.""" + # Create minimal identification data + prot_id = oms.ProteinIdentification() + prot_id.setIdentifier("search1") + + protein_ids = [prot_id] + + # Create design from identifications + design = Py_ExperimentalDesign.from_identifications(protein_ids) + + assert isinstance(design, Py_ExperimentalDesign) + assert isinstance(design.native, oms.ExperimentalDesign) + + +def test_py_experimentaldesign_from_dataframe(): + """Test creating ExperimentalDesign from a pandas DataFrame.""" + import pandas as pd + + # Create a simple DataFrame + df = pd.DataFrame( + { + "Fraction_Group": [1, 1, 2, 2], + "Fraction": [1, 2, 1, 2], + "Spectra_Filepath": ["f1.mzML", "f2.mzML", "f3.mzML", "f4.mzML"], + "Label": [1, 1, 1, 1], + "Sample": [1, 1, 2, 2], + } + ) + + design = Py_ExperimentalDesign.from_dataframe(df) + + assert isinstance(design, Py_ExperimentalDesign) + assert design.n_ms_files == 4 + assert design.n_samples == 2 + assert design.n_fraction_groups == 2 + + +def test_py_experimentaldesign_from_df_alias(): + """Test that from_df is an alias for from_dataframe.""" + import pandas as pd + + df = pd.DataFrame( + { + "Fraction_Group": [1], + "Fraction": [1], + "Spectra_Filepath": ["test.mzML"], + "Label": [1], + "Sample": [1], + } + ) + + design1 = Py_ExperimentalDesign.from_dataframe(df) + design2 = Py_ExperimentalDesign.from_df(df) + + assert design1.n_ms_files == design2.n_ms_files + + +def test_py_experimentaldesign_from_dataframe_missing_columns(): + """Test that from_dataframe raises error for missing columns.""" + import pandas as pd + + # Missing Sample column + df = pd.DataFrame( + { + "Fraction_Group": [1], + "Fraction": [1], + "Spectra_Filepath": ["test.mzML"], + "Label": [1], + } + ) + + with pytest.raises(ValueError, match="missing required columns"): + Py_ExperimentalDesign.from_dataframe(df) + + +def test_py_experimentaldesign_to_dataframe(): + """Test converting ExperimentalDesign to DataFrame.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + df = design.to_dataframe() + + assert isinstance(df, pd.DataFrame) + assert len(df) == 6 + assert set(df.columns) == { + "Fraction_Group", + "Fraction", + "Spectra_Filepath", + "Label", + "Sample", + } + assert df["Sample"].nunique() == 2 + + +def test_py_experimentaldesign_get_df_alias(): + """Test that get_df is an alias for to_dataframe.""" + example_path = get_example("experimental_design.tsv") + design = Py_ExperimentalDesign.from_file(example_path) + + df1 = design.to_dataframe() + df2 = design.get_df() + + assert df1.equals(df2) + + +def test_py_experimentaldesign_dataframe_roundtrip(): + """Test that DataFrame roundtrip preserves data.""" + import pandas as pd + + # Create a DataFrame with specific data + original_df = pd.DataFrame( + { + "Fraction_Group": [1, 1, 2], + "Fraction": [1, 2, 1], + "Spectra_Filepath": ["a.mzML", "b.mzML", "c.mzML"], + "Label": [1, 1, 2], + "Sample": [1, 1, 2], + } + ) + + # Create design from DataFrame + design = Py_ExperimentalDesign.from_dataframe(original_df) + + # Convert back to DataFrame + result_df = design.to_dataframe() + + # Check that data is preserved + assert len(result_df) == len(original_df) + assert result_df["Fraction_Group"].tolist() == original_df["Fraction_Group"].tolist() + assert result_df["Fraction"].tolist() == original_df["Fraction"].tolist() + assert result_df["Spectra_Filepath"].tolist() == original_df["Spectra_Filepath"].tolist() + assert result_df["Label"].tolist() == original_df["Label"].tolist() + assert result_df["Sample"].tolist() == original_df["Sample"].tolist()