Project-OSmOSE · MaelleTtrt · Jan 7, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py
@@ -393,7 +393,6 @@ def plot(
             color = kwargs.get("color")
             season = kwargs.get("season")
             effort = kwargs.get("effort")
-
             if not bin_size:
                 msg = "'bin_size' missing for histogram plot."
                 raise ValueError(msg)

diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py
@@ -7,7 +7,7 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, fields
 from pathlib import Path
 from typing import TYPE_CHECKING, Literal
 
@@ -44,6 +44,12 @@ class DetectionFilter:
     box: bool = False
     filename_format: str = None
 
+    def __getitem__(self, key: str):
+        """Return the value of the given key."""
+        if key in {f.name for f in fields(self)}:
+            return getattr(self, key)
+        raise KeyError(key)
+
     @classmethod
     def from_yaml(
         cls,

diff --git a/src/post_processing/dataclass/recording_period.py b/src/post_processing/dataclass/recording_period.py
@@ -8,19 +8,15 @@
 from dataclasses import dataclass
 from typing import TYPE_CHECKING
 
-from osekit.config import TIMESTAMP_FORMATS_EXPORTED_FILES
-from osekit.utils.timestamp_utils import strptime_from_text
 from pandas import (
     Series,
     Timedelta,
-    cut,
+    date_range,
+    interval_range,
     read_csv,
+    to_datetime,
 )
 
-from post_processing.utils.core_utils import (
-    get_time_range_and_bin_size,
-    localize_timestamps,
-)
 from post_processing.utils.filtering_utils import (
     find_delimiter,
 )
@@ -33,7 +29,7 @@
 
 @dataclass(frozen=True)
 class RecordingPeriod:
-    """A class to handle recording periods."""
+    """Represents recording effort over time, aggregated into bins."""
 
     counts: Series
     timebin_origin: Timedelta
@@ -42,33 +38,124 @@ class RecordingPeriod:
     def from_path(
         cls,
         config: DetectionFilter,
-        date_format: str = TIMESTAMP_FORMATS_EXPORTED_FILES,
         *,
         bin_size: Timedelta | BaseOffset,
     ) -> RecordingPeriod:
-        """Return a list of Timestamps corresponding to recording periods."""
+        """Vectorized creation of recording coverage from CSV with start/end datetimes.
+
+        This method reads a CSV with columns:
+        - "start_recording"
+        - "end_recording"
+        - "start_deployment"
+        - "end_deployment"
+
+        It computes the **effective recording interval** as the intersection between
+        recording and deployment periods, builds a fine-grained timeline at
+        `timebin_origin` resolution, and aggregates effort into `bin_size` bins.
+
+        Parameters
+        ----------
+        config
+            Configuration object containing at least:
+            - `timestamp_file`: path to CSV
+            - `timebin_origin`: Timedelta resolution of detections
+        bin_size : Timedelta or BaseOffset
+            Size of the aggregation bin (e.g., Timedelta("1H") or "1D").
+
+        Returns
+        -------
+        RecordingPeriod
+            Object containing `counts` (Series indexed by IntervalIndex) and
+            `timebin_origin`.
+
+        """
+        # Read CSV and parse datetime columns
         timestamp_file = config.timestamp_file
         delim = find_delimiter(timestamp_file)
-        timestamp_df = read_csv(timestamp_file, delimiter=delim)
-
-        if "timestamp" in timestamp_df.columns:
-            msg = "Parsing 'timestamp' column not implemented yet."
-            raise NotImplementedError(msg)
-
-        if "filename" in timestamp_df.columns:
-            timestamps = [
-                    strptime_from_text(ts, date_format)
-                    for ts in timestamp_df["filename"]
-                ]
-            timestamps = localize_timestamps(timestamps, config.timezone)
-            time_vector, bin_size = get_time_range_and_bin_size(timestamps, bin_size)
-
-            binned = cut(timestamps, time_vector)
-            max_annot = bin_size / config.timebin_origin
-
-            return cls(counts=binned.value_counts().sort_index().clip(upper=max_annot),
-                       timebin_origin=config.timebin_origin,
-                       )
-
-        msg = "Could not parse timestamps."
-        raise ValueError(msg)
+        df = read_csv(
+            config.timestamp_file,
+            parse_dates=[
+                "start_recording",
+                "end_recording",
+                "start_deployment",
+                "end_deployment",
+            ],
+            delimiter=delim,
+        )
+
+        if df.empty:
+            msg = "CSV is empty."
+            raise ValueError(msg)
+
+        # Ensure all required columns are present
+        required_columns = {
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        }
+
+        missing = required_columns - set(df.columns)
+
+        if missing:
+            msg = f"CSV is missing required columns: {', '.join(sorted(missing))}"
+            raise ValueError(msg)
+
+        # Normalize timezones: convert to UTC, then remove tz info (naive)
+        for col in [
+            "start_recording",
+            "end_recording",
+            "start_deployment",
+            "end_deployment",
+        ]:
+            df[col] = to_datetime(df[col], utc=True).dt.tz_convert(None)
+
+        # Compute effective recording intervals (intersection)
+        df["effective_start_recording"] = df[
+            ["start_recording", "start_deployment"]
+        ].max(axis=1)
+
+        df["effective_end_recording"] = df[
+            ["end_recording", "end_deployment"]
+        ].min(axis=1)
+
+        # Remove rows with no actual recording interval
+        df = df.loc[df["effective_start_recording"] < df["effective_end_recording"]].copy()
+
+        if df.empty:
+            msg = "No valid recording intervals after deployment intersection."
+            raise ValueError(msg)
+
+        # Build fine-grained timeline at `timebin_origin` resolution
+        origin = config.timebin_origin
+        time_index = date_range(
+            start=df["effective_start_recording"].min(),
+            end=df["effective_end_recording"].max(),
+            freq=origin,
+        )
+
+        # Initialize effort vector (0 = no recording, 1 = recording)
+        # Compare each timestamp to all intervals in a vectorized manner
+        effort = Series(0, index=time_index)
+
+        # Vectorized interval coverage
+        t_vals = time_index.to_numpy()[:, None]
+        start_vals = df["effective_start_recording"].to_numpy()
+        end_vals = df["effective_end_recording"].to_numpy()
+
+        # Boolean matrix: True if the timestamp is within any recording interval
+        covered = (t_vals >= start_vals) & (t_vals < end_vals)
+        effort[:] = covered.any(axis=1).astype(int)
+
+        # Aggregate effort into user-defined bin_size
+        counts = effort.resample(bin_size).sum()
+
+        # Replace index with IntervalIndex for downstream compatibility
+        counts.index = interval_range(
+            start=counts.index[0],
+            periods=len(counts),
+            freq=bin_size,
+            closed="left",
+        )
+
+        return cls(counts=counts, timebin_origin=origin)
diff --git a/src/post_processing/utils/core_utils.py b/src/post_processing/utils/core_utils.py
@@ -11,7 +11,7 @@
 from astral.sun import sunrise, sunset
 from matplotlib import pyplot as plt
 from osekit.config import TIMESTAMP_FORMAT_AUDIO_FILE
-from osekit.utils.timestamp_utils import strptime_from_text, strftime_osmose_format
+from osekit.utils.timestamp_utils import strftime_osmose_format, strptime_from_text
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -255,7 +255,6 @@ def add_weak_detection(
                         new_line.append(np.nan)
                     df.loc[df.index.max() + 1] = new_line
 
-
     return df.sort_values(by=["start_datetime", "annotator"]).reset_index(drop=True)
 
 
@@ -509,11 +508,10 @@ def get_time_range_and_bin_size(
 
     if isinstance(bin_size, Timedelta):
         return timestamp_range, bin_size
-    elif isinstance(bin_size, BaseOffset):
+    if isinstance(bin_size, BaseOffset):
         return timestamp_range, timestamp_range[1] - timestamp_range[0]
-    else:
-        msg = "bin_size must be a Timedelta or BaseOffset."
-        raise TypeError(msg)
+    msg = "bin_size must be a Timedelta or BaseOffset."
+    raise TypeError(msg)
 
 
 def round_begin_end_timestamps(

diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING
 
 import pytz
+from osekit.utils.timestamp_utils import strptime_from_text
 from pandas import (
     DataFrame,
     Timedelta,
@@ -509,8 +510,8 @@ def reshape_timebin(
     timebin_new: Timedelta
         The size of the new time bin.
     timestamp_audio: list[Timestamp]
-        A list of Timestamp objects corresponding to the shape
-        in which the data should be reshaped.
+        A list of Timestamp objects corresponding to the start of each wav
+         that corresponds to a detection
 
     Returns
     -------
@@ -570,16 +571,17 @@ def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]:
 
     """
     tz = get_timezone(df)
-    try:
-        return [
-        to_datetime(
-            ts,
-            format=date_parser,
-        ).tz_localize(tz) for ts in df["filename"]
-        ]
-    except ValueError:
-        msg = """Could not parse timestamps from `df["filename"]`."""
-        raise ValueError(msg) from None
+    timestamps = [
+    strptime_from_text(
+        ts,
+        datetime_template=date_parser,
+    ) for ts in df["filename"]
+    ]
+
+    if all(t.tz is None for t in timestamps):
+        timestamps = [t.tz_localize(tz) for t in timestamps]
+
+    return timestamps
 
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None: