From cf2aa475d7319037ee6af0eaed009a025a1ed0ad Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Wed, 5 Nov 2025 14:28:36 +0100
Subject: [PATCH 1/7] correction of reshape_timebin() wav names

---
 src/post_processing/utils/filtering_utils.py | 36 +++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index a656855..d10436a 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -220,6 +220,7 @@ def get_timezone(df: DataFrame):
 
 def reshape_timebin(
     df: DataFrame,
+    timestamp_wav: list[Timestamp],
     timebin_new: Timedelta | None,
     timestamp: list[Timestamp] | None = None,
 ) -> DataFrame:
@@ -232,7 +233,10 @@ def reshape_timebin(
     timebin_new: Timedelta
         The size of the new time bin.
     timestamp: list[Timestamp]
-        A list of Timestamp objects.
+        A list of Timestamp objects corresponding to the shape
+        in which the data should be reshaped.
+    timestamp_wav: list[Timestamp]
+        A list of the start datetime of each wavfile. Length should be the same as df
 
     Returns
     -------
@@ -253,8 +257,10 @@ def reshape_timebin(
     dataset = get_dataset(df)
 
     if isinstance(get_timezone(df), list):
-       df["start_datetime"] = [to_datetime(elem, utc=True) for elem in df["start_datetime"]]
-       df["end_datetime"] = [to_datetime(elem, utc=True) for elem in df["end_datetime"]]
+        df["start_datetime"] = [to_datetime(elem, utc=True)
+                                for elem in df["start_datetime"]]
+        df["end_datetime"] = [to_datetime(elem, utc=True)
+                              for elem in df["end_datetime"]]
 
     results = []
     for ant in annotators:
@@ -280,14 +286,19 @@ def reshape_timebin(
             filenames = df_1annot_1label["filename"].to_list()
 
             # filename_vector
-            filename_vector = [
-                filenames[
-                    bisect.bisect_left(ts_detect_beg, ts) - (ts not in ts_detect_beg)
-                ]
-                if bisect.bisect_left(ts_detect_beg, ts) > 0
-                else filenames[0]
-                for ts in time_vector
-            ]
+            filename_vector = []
+            for ts in time_vector:
+                if (bisect.bisect_left(ts_detect_beg, ts) > 0 and
+                        bisect.bisect_left(ts_detect_beg, ts) != len(ts_detect_beg)):
+                    idx = bisect.bisect_left(ts_detect_beg, ts)
+                    filename_vector.append(
+                        filenames[idx] if timestamp_wav[idx] <= ts else
+                        filenames[idx - 1],
+                    )
+                elif bisect.bisect_left(ts_detect_beg, ts) == len(ts_detect_beg):
+                    filename_vector.append(filenames[-1])
+                else:
+                    filename_vector.append(filenames[0])
 
             # detection vector
             detect_vec = [0] * len(time_vector)
@@ -327,7 +338,8 @@ def reshape_timebin(
                     ),
                 )
 
-    return concat(results).sort_values(by=["start_datetime", "end_datetime", "annotator", "annotation"]).reset_index(drop=True)
+    return concat(results).sort_values(by=["start_datetime", "end_datetime",
+                                           "annotator", "annotation"]).reset_index(drop=True)
 
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None:

From ace78e2727695cb93209685efa0319a5191ad251 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Wed, 5 Nov 2025 17:31:28 +0100
Subject: [PATCH 2/7] changes related to fix of reshape_timebin

---
 src/post_processing/dataclass/detection_filter.py |  2 +-
 tests/conftest.py                                 | 15 ++++++++++-----
 tests/test_DetectionFilters.py                    |  1 +
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py
index 4378192..c7938f4 100644
--- a/src/post_processing/dataclass/detection_filter.py
+++ b/src/post_processing/dataclass/detection_filter.py
@@ -42,6 +42,7 @@ class DetectionFilter:
     f_max: float | None = None
     score: float | None = None
     box: bool = False
+    filename_format: str = None
 
     @classmethod
     def from_yaml(
@@ -86,7 +87,6 @@ def from_dict(
         filters = []
         for detection_file, filters_dict in parameters.items():
             df_preview = read_dataframe(Path(detection_file), nrows=5)
-
             filters_dict["timebin_origin"] = Timedelta(
                 max(df_preview["end_time"]),
                 "s",
diff --git a/tests/conftest.py b/tests/conftest.py
index 158c269..0031c29 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,13 +1,16 @@
 import io
 import os
 from pathlib import Path
+from sqlite3.dbapi2 import Timestamp
 
 import numpy as np
 import pytest
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, read_csv
+from pandas import DataFrame, read_csv, to_datetime
+
+from post_processing.utils.filtering_utils import get_timezone, read_dataframe
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11
@@ -164,10 +167,11 @@ def sample_csv_timestamp(tmp_path: Path, sample_status: DataFrame) -> Path:
 
 
 @pytest.fixture
-def sample_yaml(tmp_path: Path,
-                           sample_csv_result: Path,
-                           sample_csv_timestamp: Path,
-                           ) -> Path:
+def sample_yaml(
+        tmp_path: Path,
+        sample_csv_result: Path,
+        sample_csv_timestamp: Path,
+) -> Path:
     yaml_content = {
         f"{sample_csv_result}": {
             "timebin_new": None,
@@ -177,6 +181,7 @@ def sample_yaml(tmp_path: Path,
             "annotation": "lbl1",
             "box": True,
             "timestamp_file": f"{sample_csv_timestamp}",
+            "filename_format": "%Y_%m_%d_%H_%M_%S",
             "user_sel": "all",
             "f_min": None,
             "f_max": None,
diff --git a/tests/test_DetectionFilters.py b/tests/test_DetectionFilters.py
index 6b7dd2f..90d80ab 100644
--- a/tests/test_DetectionFilters.py
+++ b/tests/test_DetectionFilters.py
@@ -18,6 +18,7 @@ def test_from_yaml(sample_yaml: Path,
             "annotation": "lbl1",
             "box": True,
             "timestamp_file": f"{sample_csv_timestamp}",
+            "filename_format": "%Y_%m_%d_%H_%M_%S",
             "user_sel": "all",
             "f_min": None,
             "f_max": None,

From 326451c7e3834fdee88fae1c6f24ff71dbd9418d Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Wed, 5 Nov 2025 17:34:41 +0100
Subject: [PATCH 3/7] fix ruff

---
 tests/conftest.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 0031c29..e03bf43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,13 @@
 import io
 import os
 from pathlib import Path
-from sqlite3.dbapi2 import Timestamp
 
 import numpy as np
 import pytest
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, read_csv, to_datetime
-
-from post_processing.utils.filtering_utils import get_timezone, read_dataframe
+from pandas import DataFrame, read_csv
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11

From 35a7b9b3dbb14cabacd43d7e7a7ca5bd21283503 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ma=C3=ABlle=20TORTEROTOT?= <maelle.torterotot@ensta.fr>
Date: Fri, 5 Dec 2025 12:20:21 +0100
Subject: [PATCH 4/7] add import

---
 tests/test_filtering_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py
index 12eb988..59b03fb 100644
--- a/tests/test_filtering_utils.py
+++ b/tests/test_filtering_utils.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import csv
 from pathlib import Path
 from zoneinfo import ZoneInfo

From 40c861d0272b6b08f14b07d2cb7125c68020c553 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 14:59:58 +0100
Subject: [PATCH 5/7] rebase with new upstream

---
 src/post_processing/utils/filtering_utils.py | 111 +++++++++++++++----
 tests/conftest.py                            |   5 +-
 tests/test_filtering_utils.py                |  52 +++++++--
 3 files changed, 133 insertions(+), 35 deletions(-)

diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py
index d10436a..036aca5 100644
--- a/src/post_processing/utils/filtering_utils.py
+++ b/src/post_processing/utils/filtering_utils.py
@@ -196,7 +196,7 @@ def get_dataset(df: DataFrame) -> list[str]:
 def get_canonical_tz(tz):
     """Return timezone of object as a pytz timezone."""
     if isinstance(tz, datetime.timezone):
-        if tz == datetime.timezone.utc:
+        if tz == datetime.UTC:
             return pytz.utc
         offset_minutes = int(tz.utcoffset(None).total_seconds() / 60)
         return pytz.FixedOffset(offset_minutes)
@@ -204,13 +204,24 @@ def get_canonical_tz(tz):
         return pytz.timezone(tz.zone)
     if hasattr(tz, "key"):
         return pytz.timezone(tz.key)
-    else:
-        msg = f"Unknown timezone: {tz}"
-        raise TypeError(msg)
+    msg = f"Unknown timezone: {tz}"
+    raise TypeError(msg)
 
 
 def get_timezone(df: DataFrame):
-    """Return timezone(s) from DataFrame."""
+    """Return timezone(s) from APLOSE DataFrame.
+
+    Parameters
+    ----------
+    df: DataFrame
+        APLOSE result Dataframe
+
+    Returns
+    -------
+        tzoffset: list[tzoffset]
+            list of timezones
+
+    """
     timezones = {get_canonical_tz(ts.tzinfo) for ts in df["start_datetime"]}
 
     if len(timezones) == 1:
@@ -218,11 +229,29 @@ def get_timezone(df: DataFrame):
     return list(timezones)
 
 
+def check_timestamp(df: DataFrame, timestamp_audio: list[Timestamp]) -> None:
+    """Check if provided timestamp_audio list is correctly formated.
+
+    Parameters
+    ----------
+    df: DataFrame APLOSE results Dataframe.
+    timestamp_audio: A list of timestamps. Each timestamp is the start datetime of the
+    corresponding audio file for each detection in df.
+
+    """
+    if timestamp_audio is None:
+        msg = "`timestamp_wav` is empty"
+        raise ValueError(msg)
+    if len(timestamp_audio) != len(df):
+        msg = "`timestamp_wav` is not the same length as `df`"
+        raise ValueError(msg)
+
+
 def reshape_timebin(
     df: DataFrame,
-    timestamp_wav: list[Timestamp],
+    *,
     timebin_new: Timedelta | None,
-    timestamp: list[Timestamp] | None = None,
+    timestamp_audio: list[Timestamp] | None = None,
 ) -> DataFrame:
     """Reshape an APLOSE result DataFrame according to a new time bin.
 
@@ -232,11 +261,9 @@ def reshape_timebin(
         An APLOSE result DataFrame.
     timebin_new: Timedelta
         The size of the new time bin.
-    timestamp: list[Timestamp]
+    timestamp_audio: list[Timestamp]
         A list of Timestamp objects corresponding to the shape
         in which the data should be reshaped.
-    timestamp_wav: list[Timestamp]
-        A list of the start datetime of each wavfile. Length should be the same as df
 
     Returns
     -------
@@ -251,6 +278,8 @@ def reshape_timebin(
     if not timebin_new:
         return df
 
+    check_timestamp(df, timestamp_audio)
+
     annotators = get_annotators(df)
     labels = get_labels(df)
     max_freq = get_max_freq(df)
@@ -258,9 +287,11 @@ def reshape_timebin(
 
     if isinstance(get_timezone(df), list):
         df["start_datetime"] = [to_datetime(elem, utc=True)
-                                for elem in df["start_datetime"]]
+                                for elem in df["start_datetime"]
+                                ]
         df["end_datetime"] = [to_datetime(elem, utc=True)
-                              for elem in df["end_datetime"]]
+                              for elem in df["end_datetime"]
+                              ]
 
     results = []
     for ant in annotators:
@@ -270,13 +301,13 @@ def reshape_timebin(
             if df_1annot_1label.empty:
                 continue
 
-            if timestamp is not None:
+            if timestamp_audio is not None:
                 # I do not remember if this is a regular case or not
                 # might need to be deleted
-                origin_timebin = timestamp[1] - timestamp[0]
-                step = int(timebin_new / origin_timebin)
-                time_vector = timestamp[0::step]
-            else:
+                #origin_timebin = timestamp_audio[1] - timestamp_audio[0]
+                #step = int(timebin_new / origin_timebin)
+                #time_vector = timestamp_audio[0::step]
+            #else:
                 t1 = min(df_1annot_1label["start_datetime"]).floor(timebin_new)
                 t2 = max(df_1annot_1label["end_datetime"]).ceil(timebin_new)
                 time_vector = date_range(start=t1, end=t2, freq=timebin_new)
@@ -292,7 +323,7 @@ def reshape_timebin(
                         bisect.bisect_left(ts_detect_beg, ts) != len(ts_detect_beg)):
                     idx = bisect.bisect_left(ts_detect_beg, ts)
                     filename_vector.append(
-                        filenames[idx] if timestamp_wav[idx] <= ts else
+                        filenames[idx] if timestamp_audio[idx] <= ts else
                         filenames[idx - 1],
                     )
                 elif bisect.bisect_left(ts_detect_beg, ts) == len(ts_detect_beg):
@@ -338,9 +369,39 @@ def reshape_timebin(
                     ),
                 )
 
-    return concat(results).sort_values(by=["start_datetime", "end_datetime",
-                                           "annotator", "annotation"]).reset_index(drop=True)
+    return (concat(results).
+            sort_values(by=["start_datetime", "end_datetime",
+                            "annotator", "annotation"]).reset_index(drop=True)
+            )
+
 
+def get_filename_timestamps(df: DataFrame, date_parser: str) -> list[Timestamp]:
+    """Get start timestamps of the wav files of each detection contained in df.
+
+    Parameters.
+    ----------
+    df: DataFrame
+        An APLOSE result DataFrame.
+    date_parser: str
+        date parser of the wav file
+
+    Returns
+    -------
+    List of Timestamps corresponding to the wav files' start timestamps
+    of each detection contained in df.
+
+    """
+    tz = get_timezone(df)
+    try:
+        return [
+        to_datetime(
+            ts,
+            format=date_parser,
+        ).tz_localize(tz) for ts in df["filename"]
+        ]
+    except ValueError:
+        msg = """Could not parse timestamps from `df["filename"]`."""
+        raise ValueError(msg) from None
 
 def ensure_in_list(value: str, candidates: list[str], label: str) -> None:
     """Check for non-valid elements of a list."""
@@ -378,10 +439,14 @@ def load_detections(filters: DetectionFilter) -> DataFrame:
     df = filter_by_label(df, label=filters.annotation)
     df = filter_by_freq(df, filters.f_min, filters.f_max)
     df = filter_by_score(df, filters.score)
-    df = reshape_timebin(df, filters.timebin_new)
+    filename_ts = get_filename_timestamps(df, filters.filename_format)
+    df = reshape_timebin(df,
+                         timebin_new=filters.timebin_new,
+                         timestamp_audio=filename_ts
+                         )
 
     annotators = get_annotators(df)
-    if len(annotators) > 1 and filters.user_sel in ["union", "intersection"]:
+    if len(annotators) > 1 and filters.user_sel in {"union", "intersection"}:
         df = intersection_or_union(df, user_sel=filters.user_sel)
 
     return df.sort_values(by=["start_datetime", "end_datetime"]).reset_index(drop=True)
@@ -397,7 +462,7 @@ def intersection_or_union(df: DataFrame, user_sel: str) -> DataFrame:
     if user_sel == "all":
         return df
 
-    if user_sel not in ("intersection", "union"):
+    if user_sel not in {"intersection", "union"}:
         msg = "'user_sel' must be either 'intersection' or 'union'"
         raise ValueError(msg)
 
diff --git a/tests/conftest.py b/tests/conftest.py
index e03bf43..0031c29 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,13 +1,16 @@
 import io
 import os
 from pathlib import Path
+from sqlite3.dbapi2 import Timestamp
 
 import numpy as np
 import pytest
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, read_csv
+from pandas import DataFrame, read_csv, to_datetime
+
+from post_processing.utils.filtering_utils import get_timezone, read_dataframe
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11
diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py
index 59b03fb..04b32b5 100644
--- a/tests/test_filtering_utils.py
+++ b/tests/test_filtering_utils.py
@@ -298,7 +298,7 @@ def test_get_timezone_single(sample_df: DataFrame) -> None:
 def test_get_timezone_several(sample_df: DataFrame) -> None:
     new_row = {
         "dataset": "dataset",
-        "filename": "filename",
+        "filename": "2025_01_26_06_20_00",
         "start_time": 0,
         "end_time": 2,
         "start_frequency": 100,
@@ -384,7 +384,7 @@ def test_no_timebin_returns_original(sample_df: DataFrame) -> None:
 def test_no_timebin_several_tz(sample_df: DataFrame) -> None:
     new_row = {
         "dataset": "dataset",
-        "filename": "filename",
+        "filename": "2025_01_26_06_20_00",
         "start_time": 0,
         "end_time": 2,
         "start_frequency": 100,
@@ -400,13 +400,24 @@ def test_no_timebin_several_tz(sample_df: DataFrame) -> None:
         [sample_df, DataFrame([new_row])],
         ignore_index=False
     )
-
-    df_out = reshape_timebin(sample_df, timebin_new=None)
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(sample_df["filename"],
+                                format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(pytz.UTC)
+    df_out = reshape_timebin(sample_df, timestamp_audio=timestamp_wav, timebin_new=None)
     assert df_out.equals(sample_df)
 
 
 def test_no_timebin_original_timebin(sample_df: DataFrame) -> None:
-    df_out = reshape_timebin(sample_df, timebin_new=Timedelta("1min"))
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(
+        sample_df["filename"],
+        format="%Y_%m_%d_%H_%M_%S"
+    ).dt.tz_localize(tz)
+    df_out = reshape_timebin(
+        sample_df,
+        timestamp_audio=timestamp_wav,
+        timebin_new=Timedelta("1min"),
+    )
     expected = DataFrame(
         {
             "dataset": ["sample_dataset"] * 18,
@@ -488,7 +499,16 @@ def test_no_timebin_original_timebin(sample_df: DataFrame) -> None:
 
 
 def test_simple_reshape_hourly(sample_df: DataFrame) -> None:
-    df_out = reshape_timebin(sample_df, timebin_new=Timedelta(hours=1))
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(
+        sample_df["filename"],
+        format="%Y_%m_%d_%H_%M_%S"
+    ).dt.tz_localize(tz)
+    df_out = reshape_timebin(
+        sample_df,
+        timestamp_audio=timestamp_wav,
+        timebin_new=Timedelta(hours=1),
+    )
     assert not df_out.empty
     assert all(df_out["end_time"] == 3600.0)
     assert df_out["end_frequency"].max() == sample_df["end_frequency"].max()
@@ -497,7 +517,12 @@ def test_simple_reshape_hourly(sample_df: DataFrame) -> None:
 
 
 def test_reshape_daily_multiple_bins(sample_df: DataFrame) -> None:
-    df_out = reshape_timebin(sample_df, timebin_new=Timedelta(days=1))
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(
+        sample_df["filename"],
+        format="%Y_%m_%d_%H_%M_%S"
+    ).dt.tz_localize(tz)
+    df_out = reshape_timebin(sample_df, timestamp_audio=timestamp_wav, timebin_new=Timedelta(days=1))
     assert not df_out.empty
     assert all(df_out["end_time"] == 86400.0)
     assert df_out["start_datetime"].min() >= sample_df["start_datetime"].min().floor("D")
@@ -508,11 +533,13 @@ def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None:
     t0 = sample_df["start_datetime"].min().floor("30min")
     t1 = sample_df["end_datetime"].max().ceil("30min")
     ts_vec = list(date_range(t0, t1, freq="30min"))
-
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(sample_df["filename"],
+                                format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(tz)
     df_out = reshape_timebin(
         sample_df,
-        timebin_new=Timedelta(hours=1),
-        timestamp=ts_vec,
+        timestamp_audio=timestamp_wav,
+        timebin_new=Timedelta(hours=1)
     )
 
     assert not df_out.empty
@@ -521,8 +548,11 @@ def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None:
 
 
 def test_empty_result_when_no_matching(sample_df: DataFrame) -> None:
+    tz = get_timezone(sample_df)
+    timestamp_wav = to_datetime(sample_df["filename"],
+                                format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(tz)
     with pytest.raises(ValueError, match="DataFrame is empty"):
-        reshape_timebin(DataFrame(), Timedelta(hours=1))
+        reshape_timebin(DataFrame(), timestamp_audio=timestamp_wav, timebin_new=Timedelta(hours=1))
 
 
 # %% ensure_no_invalid

From 3b3f89ade618af740ca963c85a6672698af410b5 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 15:06:34 +0100
Subject: [PATCH 6/7] fix_ruff

---
 tests/conftest.py             | 5 +----
 tests/test_filtering_utils.py | 2 --
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 0031c29..e03bf43 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,16 +1,13 @@
 import io
 import os
 from pathlib import Path
-from sqlite3.dbapi2 import Timestamp
 
 import numpy as np
 import pytest
 import soundfile as sf
 import yaml
 from osekit.utils.timestamp_utils import strftime_osmose_format
-from pandas import DataFrame, read_csv, to_datetime
-
-from post_processing.utils.filtering_utils import get_timezone, read_dataframe
+from pandas import DataFrame, read_csv
 
 SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score
 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11
diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py
index 04b32b5..a12fe67 100644
--- a/tests/test_filtering_utils.py
+++ b/tests/test_filtering_utils.py
@@ -400,7 +400,6 @@ def test_no_timebin_several_tz(sample_df: DataFrame) -> None:
         [sample_df, DataFrame([new_row])],
         ignore_index=False
     )
-    tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(sample_df["filename"],
                                 format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(pytz.UTC)
     df_out = reshape_timebin(sample_df, timestamp_audio=timestamp_wav, timebin_new=None)
@@ -532,7 +531,6 @@ def test_reshape_daily_multiple_bins(sample_df: DataFrame) -> None:
 def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None:
     t0 = sample_df["start_datetime"].min().floor("30min")
     t1 = sample_df["end_datetime"].max().ceil("30min")
-    ts_vec = list(date_range(t0, t1, freq="30min"))
     tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(sample_df["filename"],
                                 format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(tz)

From 233c5a635c5276fe6e868fbe9c6c5f193329b478 Mon Sep 17 00:00:00 2001
From: MaelleTtrt <48910568+MaelleTtrt@users.noreply.github.com>
Date: Tue, 16 Dec 2025 15:08:41 +0100
Subject: [PATCH 7/7] fix_ruff2

---
 tests/test_filtering_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py
index a12fe67..68b8d20 100644
--- a/tests/test_filtering_utils.py
+++ b/tests/test_filtering_utils.py
@@ -6,7 +6,7 @@
 
 import pytest
 import pytz
-from pandas import DataFrame, Timedelta, Timestamp, date_range, concat, to_datetime
+from pandas import DataFrame, Timedelta, Timestamp, concat, to_datetime
 
 from post_processing.utils.filtering_utils import (
     filter_by_annotator,
@@ -529,8 +529,7 @@ def test_reshape_daily_multiple_bins(sample_df: DataFrame) -> None:
 
 
 def test_with_manual_timestamps_vector(sample_df: DataFrame) -> None:
-    t0 = sample_df["start_datetime"].min().floor("30min")
-    t1 = sample_df["end_datetime"].max().ceil("30min")
+
     tz = get_timezone(sample_df)
     timestamp_wav = to_datetime(sample_df["filename"],
                                 format="%Y_%m_%d_%H_%M_%S").dt.tz_localize(tz)