From 4eb26c35d72ec48236ce3788a0cff2675bdab472 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 10 Mar 2026 16:58:03 -0600
Subject: [PATCH 1/4] Add xclust sorting extractor

---
 .gitignore                                    |   1 +
 .../extractors/extractor_classes.py           |   2 +
 .../extractors/tests/test_xclustextractors.py |  39 +++++
 .../extractors/xclustextractors.py            | 162 ++++++++++++++++++
 4 files changed, 204 insertions(+)
 create mode 100644 src/spikeinterface/extractors/tests/test_xclustextractors.py
 create mode 100644 src/spikeinterface/extractors/xclustextractors.py

diff --git a/.gitignore b/.gitignore
index e3024af0dd..2baa4b4f92 100644
--- a/.gitignore
+++ b/.gitignore
@@ -146,3 +146,4 @@ test_folder/
 # Mac OS
 .DS_Store
 test_data.json
+uv.lock
diff --git a/src/spikeinterface/extractors/extractor_classes.py b/src/spikeinterface/extractors/extractor_classes.py
index 653dfe9b1b..7a44985f33 100644
--- a/src/spikeinterface/extractors/extractor_classes.py
+++ b/src/spikeinterface/extractors/extractor_classes.py
@@ -44,6 +44,7 @@
 from .klustaextractors import KlustaSortingExtractor, read_klusta
 from .hdsortextractors import HDSortSortingExtractor, read_hdsort
 from .mclustextractors import MClustSortingExtractor, read_mclust
+from .xclustextractors import XClustSortingExtractor, read_xclust
 from .waveclustextractors import WaveClusSortingExtractor, read_waveclus
 from .yassextractors import YassSortingExtractor, read_yass
 from .combinatoextractors import CombinatoSortingExtractor, read_combinato
@@ -131,6 +132,7 @@
     KlustaSortingExtractor: dict(wrapper_string="read_klusta", wrapper_class=read_klusta),
     HDSortSortingExtractor: dict(wrapper_string="read_hdsort", wrapper_class=read_hdsort),
     MClustSortingExtractor: dict(wrapper_string="read_mclust", wrapper_class=read_mclust),
+    XClustSortingExtractor: dict(wrapper_string="read_xclust", wrapper_class=read_xclust),
     WaveClusSortingExtractor: dict(wrapper_string="read_waveclus", wrapper_class=read_waveclus),
     YassSortingExtractor: dict(wrapper_string="read_yass", wrapper_class=read_yass),
     CombinatoSortingExtractor: dict(wrapper_string="read_combinato", wrapper_class=read_combinato),
diff --git a/src/spikeinterface/extractors/tests/test_xclustextractors.py b/src/spikeinterface/extractors/tests/test_xclustextractors.py
new file mode 100644
index 0000000000..818c854948
--- /dev/null
+++ b/src/spikeinterface/extractors/tests/test_xclustextractors.py
@@ -0,0 +1,39 @@
+import tempfile
+import unittest
+
+import pytest
+
+from spikeinterface.extractors.tests.common_tests import SortingCommonTestSuite, local_folder
+from spikeinterface.extractors.xclustextractors import XClustSortingExtractor
+
+
+class XClustSortingTest(SortingCommonTestSuite, unittest.TestCase):
+    ExtractorClass = XClustSortingExtractor
+    downloads = ["xclust"]
+    entities = [
+        dict(folder_path=local_folder / "xclust" / "TT2", sampling_frequency=30_000.0),
+        dict(folder_path=local_folder / "xclust" / "TT6", sampling_frequency=30_000.0),
+        dict(
+            file_path_list=sorted((local_folder / "xclust" / "TT6").glob("*.CEL")),
+            sampling_frequency=30_000.0,
+        ),
+    ]
+
+
+class TestXClustErrors(unittest.TestCase):
+    def test_both_args_raises(self):
+        with pytest.raises(ValueError, match="not both"):
+            XClustSortingExtractor(
+                folder_path="/some/path",
+                file_path_list=["/some/file.CEL"],
+                sampling_frequency=30_000.0,
+            )
+
+    def test_neither_arg_raises(self):
+        with pytest.raises(ValueError, match="Provide one of"):
+            XClustSortingExtractor(sampling_frequency=30_000.0)
+
+    def test_empty_folder_raises(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            with pytest.raises(ValueError, match="No .CEL files"):
+                XClustSortingExtractor(folder_path=tmp_dir, sampling_frequency=30_000.0)
diff --git a/src/spikeinterface/extractors/xclustextractors.py b/src/spikeinterface/extractors/xclustextractors.py
new file mode 100644
index 0000000000..e376fd54b0
--- /dev/null
+++ b/src/spikeinterface/extractors/xclustextractors.py
@@ -0,0 +1,162 @@
+from pathlib import Path
+
+import numpy as np
+
+from spikeinterface.core import BaseSorting, BaseSortingSegment
+from spikeinterface.core.core_tools import define_function_from_class
+
+
+class XClustSortingExtractor(BaseSorting):
+    """Load XClust sorting solution as a sorting extractor.
+
+    XClust is a legacy spike sorting tool from the McNaughton lab. Each `.CEL`
+    file is ASCII with a header (``%%BEGINHEADER`` / ``%%ENDHEADER``) followed
+    by whitespace-separated tabular data containing spike times.
+
+    Parameters
+    ----------
+    folder_path : str or Path or None, default: None
+        Path to folder containing `.CEL` files. Mutually exclusive with
+        ``file_path_list``.
+    file_path_list : list of str or Path or None, default: None
+        Explicit list of `.CEL` file paths. Mutually exclusive with
+        ``folder_path``.
+    sampling_frequency : float
+        Sampling frequency in Hz.
+
+    Returns
+    -------
+    extractor : XClustSortingExtractor
+        Loaded data.
+    """
+
+    def __init__(self, folder_path=None, *, file_path_list=None, sampling_frequency):
+        if folder_path is not None and file_path_list is not None:
+            raise ValueError("Provide either 'folder_path' or 'file_path_list', not both.")
+        if folder_path is None and file_path_list is None:
+            raise ValueError("Provide one of 'folder_path' or 'file_path_list'.")
+
+        if folder_path is not None:
+            folder_path = Path(folder_path)
+            cel_files = sorted(folder_path.glob("*.CEL"))
+            if len(cel_files) == 0:
+                raise ValueError(f"No .CEL files found in {folder_path}")
+        else:
+            cel_files = [Path(f) for f in file_path_list]
+
+        unit_ids = []
+        unit_names = []
+        cluster_ids = []
+        spike_times_dict = {}
+
+        for cel_file in cel_files:
+            cluster_id, spike_times_seconds = XClustSortingExtractor._parse_cel_file(cel_file)
+            # XClust filenames follow the pattern {session_type}~{cluster_number}.CEL
+            # e.g. BL1~1.CEL, ESA23D~2.CEL. We split on "~" to build clean identifiers.
+            file_stem = cel_file.stem
+            session_type, cluster_number = file_stem.split("~")
+            # unit_id: unique identifier with "~" replaced by "_" to avoid path/query issues
+            unit_id = f"{session_type}_{cluster_number}"
+            # unit_name: human-readable label clarifying that the number refers to a cluster
+            unit_name = f"{session_type}_cluster_{cluster_number}"
+            unit_ids.append(unit_id)
+            unit_names.append(unit_name)
+            cluster_ids.append(cluster_id)
+            spike_times_dict[unit_id] = np.unique(spike_times_seconds)
+
+        BaseSorting.__init__(self, sampling_frequency=sampling_frequency, unit_ids=unit_ids)
+
+        self.add_sorting_segment(XClustSortingSegment(spike_times_dict, sampling_frequency))
+        self.set_property("unit_name", np.array(unit_names))
+        # cluster_id is not unique across session types (e.g. BL1~1 and ESA23D~1 both have
+        # cluster_id "1"), so it is stored as a property for provenance rather than as unit_id.
+        self.set_property("cluster_id", np.array(cluster_ids))
+
+        self._kwargs = {
+            "folder_path": str(Path(folder_path).absolute()) if folder_path is not None else None,
+            "file_path_list": [str(Path(f).absolute()) for f in file_path_list] if file_path_list is not None else None,
+            "sampling_frequency": sampling_frequency,
+        }
+
+    @staticmethod
+    def _parse_cel_file(file_path):
+        """Parse an XClust .CEL file and return the cluster ID and spike times in seconds.
+
+        Parameters
+        ----------
+        file_path : str or Path
+            Path to a `.CEL` file.
+
+        Returns
+        -------
+        cluster_id : int
+            The cluster ID from the header.
+        spike_times : numpy.ndarray
+            1-D array of spike times in seconds.
+        """
+        file_path = Path(file_path)
+        cluster_id = None
+        fields = None
+
+        with open(file_path, "r") as f:
+            in_header = False
+            for line in f:
+                stripped = line.strip()
+                if stripped == "%%BEGINHEADER":
+                    in_header = True
+                    continue
+                if stripped == "%%ENDHEADER":
+                    break
+                if in_header:
+                    if stripped.startswith("% Cluster:"):
+                        cluster_id = stripped.split(":")[-1].strip()
+                    elif stripped.startswith("% Fields:"):
+                        fields = stripped.split(":")[-1].strip().split()
+
+        if cluster_id is None:
+            raise ValueError(f"No 'Cluster' field found in header of {file_path}")
+        if fields is None:
+            raise ValueError(f"No 'Fields' line found in header of {file_path}")
+        if "time" not in fields:
+            raise ValueError(f"No 'time' field found in Fields of {file_path}")
+
+        time_column_index = fields.index("time")
+
+        data = np.loadtxt(file_path, comments="%")
+        spike_times = data[:, time_column_index] if data.ndim == 2 else np.array([data[time_column_index]])
+
+        return cluster_id, spike_times
+
+
+class XClustSortingSegment(BaseSortingSegment):
+    def __init__(self, spike_times_dict, sampling_frequency):
+        BaseSortingSegment.__init__(self)
+        self._spike_times_dict = spike_times_dict
+        self._sampling_frequency = sampling_frequency
+
+    def get_unit_spike_train(self, unit_id, start_frame, end_frame):
+        start_time = None if start_frame is None else start_frame / self._sampling_frequency
+        end_time = None if end_frame is None else end_frame / self._sampling_frequency
+
+        spike_times = self.get_unit_spike_train_in_seconds(unit_id=unit_id, start_time=start_time, end_time=end_time)
+        frames = np.round(spike_times * self._sampling_frequency).astype("int64", copy=False)
+        return np.unique(frames)
+
+    def get_unit_spike_train_in_seconds(self, unit_id, start_time=None, end_time=None):
+        # XClust .CEL files store spike times natively in seconds
+        spike_times = self._spike_times_dict[unit_id]
+
+        if start_time is not None:
+            start_index = np.searchsorted(spike_times, start_time, side="left")
+        else:
+            start_index = 0
+
+        if end_time is not None:
+            end_index = np.searchsorted(spike_times, end_time, side="left")
+        else:
+            end_index = spike_times.size
+
+        return spike_times[start_index:end_index]
+
+
+read_xclust = define_function_from_class(source_class=XClustSortingExtractor, name="read_xclust")

From 36521a90b18b3c08328b8c538fe70a49fa4e6b01 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 10 Mar 2026 17:09:45 -0600
Subject: [PATCH 2/4] bette errors

---
 .../extractors/tests/test_xclustextractors.py               | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spikeinterface/extractors/tests/test_xclustextractors.py b/src/spikeinterface/extractors/tests/test_xclustextractors.py
index 818c854948..e9fba4da70 100644
--- a/src/spikeinterface/extractors/tests/test_xclustextractors.py
+++ b/src/spikeinterface/extractors/tests/test_xclustextractors.py
@@ -22,7 +22,7 @@ class XClustSortingTest(SortingCommonTestSuite, unittest.TestCase):
 
 class TestXClustErrors(unittest.TestCase):
     def test_both_args_raises(self):
-        with pytest.raises(ValueError, match="not both"):
+        with pytest.raises(ValueError, match="Provide either 'folder_path' or 'file_path_list', not both."):
             XClustSortingExtractor(
                 folder_path="/some/path",
                 file_path_list=["/some/file.CEL"],
@@ -30,10 +30,10 @@ def test_both_args_raises(self):
             )
 
     def test_neither_arg_raises(self):
-        with pytest.raises(ValueError, match="Provide one of"):
+        with pytest.raises(ValueError, match="Provide one of 'folder_path' or 'file_path_list'."):
             XClustSortingExtractor(sampling_frequency=30_000.0)
 
     def test_empty_folder_raises(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
-            with pytest.raises(ValueError, match="No .CEL files"):
+            with pytest.raises(ValueError, match=f"No .CEL files found in {tmp_dir}"):
                 XClustSortingExtractor(folder_path=tmp_dir, sampling_frequency=30_000.0)

From f5c7d5cedc6c5931b5bbf66c39f0e5c909d63d7e Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 10 Mar 2026 17:47:24 -0600
Subject: [PATCH 3/4] add type hints

---
 src/spikeinterface/extractors/xclustextractors.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/extractors/xclustextractors.py b/src/spikeinterface/extractors/xclustextractors.py
index e376fd54b0..44f95f5623 100644
--- a/src/spikeinterface/extractors/xclustextractors.py
+++ b/src/spikeinterface/extractors/xclustextractors.py
@@ -30,7 +30,13 @@ class XClustSortingExtractor(BaseSorting):
         Loaded data.
     """
 
-    def __init__(self, folder_path=None, *, file_path_list=None, sampling_frequency):
+    def __init__(
+        self,
+        folder_path: str | Path | None = None,
+        *,
+        file_path_list: list[str | Path] | None = None,
+        sampling_frequency: float,
+    ):
         if folder_path is not None and file_path_list is not None:
             raise ValueError("Provide either 'folder_path' or 'file_path_list', not both.")
         if folder_path is None and file_path_list is None:

From 275f696547b1f7f5978658e5147e36cf7e202753 Mon Sep 17 00:00:00 2001
From: Heberto Mayorquin <h.mayorquin@gmail.com>
Date: Tue, 10 Mar 2026 18:06:42 -0600
Subject: [PATCH 4/4] fix windows tests

---
 src/spikeinterface/extractors/tests/test_xclustextractors.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/spikeinterface/extractors/tests/test_xclustextractors.py b/src/spikeinterface/extractors/tests/test_xclustextractors.py
index e9fba4da70..88446af5f1 100644
--- a/src/spikeinterface/extractors/tests/test_xclustextractors.py
+++ b/src/spikeinterface/extractors/tests/test_xclustextractors.py
@@ -1,3 +1,4 @@
+import re
 import tempfile
 import unittest
 
@@ -35,5 +36,5 @@ def test_neither_arg_raises(self):
 
     def test_empty_folder_raises(self):
         with tempfile.TemporaryDirectory() as tmp_dir:
-            with pytest.raises(ValueError, match=f"No .CEL files found in {tmp_dir}"):
+            with pytest.raises(ValueError, match=re.escape(f"No .CEL files found in {tmp_dir}")):
                 XClustSortingExtractor(folder_path=tmp_dir, sampling_frequency=30_000.0)