Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions imap_processing/glows/l2/glows_l2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module for GLOWS Level 2 processing."""

import dataclasses
import logging

import numpy as np
import xarray as xr
Expand All @@ -13,6 +14,8 @@
from imap_processing.glows.l2.glows_l2_data import HistogramL2
from imap_processing.spice.time import et_to_datetime64, ttj2000ns_to_et

logger = logging.getLogger(__name__)


def glows_l2(
input_dataset: xr.Dataset,
Expand Down Expand Up @@ -46,9 +49,12 @@ def glows_l2(
pipeline_settings_dataset.sel(epoch=day, method="nearest")
)

l2 = HistogramL2(input_dataset, pipeline_settings)

return [create_l2_dataset(l2, cdf_attrs)]
l2 = HistogramL2.create(input_dataset, pipeline_settings)
if l2 is None:
logger.warning("No good data found in L1B dataset. Returning empty list.")
return []
else:
return [create_l2_dataset(l2, cdf_attrs)]


def create_l2_dataset(
Expand Down
121 changes: 70 additions & 51 deletions imap_processing/glows/l2/glows_l2_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Module containing the class definition for the HistogramL2 class."""

from __future__ import annotations

from dataclasses import InitVar, dataclass, field

import numpy as np
Expand Down Expand Up @@ -123,10 +125,8 @@ class HistogramL2:

Parameters
----------
l1b_dataset : xr.Dataset
GLOWS histogram L1B dataset, as produced by glows_l1b.py.
pipeline_settings : PipelineSettings
Pipeline settings object read from ancillary file.
good_l1b_data : xr.Dataset
GLOWS histogram L1B dataset filtered by good times.

Attributes
----------
Expand Down Expand Up @@ -210,123 +210,142 @@ class HistogramL2:
spin_axis_orientation_average: np.ndarray[np.double]
bad_time_flag_occurrences: np.ndarray

def __init__(self, l1b_dataset: xr.Dataset, pipeline_settings: PipelineSettings):
def __init__(self, good_l1b_data: xr.Dataset):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PipelineSettings has other data in it that needs to be used for future L2 functionality, so it still needs to be included in this init

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't know about future L2 functionality. Can you expand on that?

"""
Given an L1B dataset, process data into an output HistogramL2 object.

Parameters
----------
l1b_dataset : xr.Dataset
GLOWS histogram L1B dataset, as produced by glows_l1b.py.
pipeline_settings : PipelineSettings
Pipeline settings object read from ancillary file.
good_l1b_data : xr.Dataset
GLOWS histogram L1B dataset filtered by good times.
"""
active_flags = np.array(pipeline_settings.active_bad_time_flags, dtype=float)

# Select the good blocks (i.e. epoch values) according to the flags. Drop any
# bad blocks before processing.
good_data = l1b_dataset.isel(
epoch=self.return_good_times(l1b_dataset["flags"], active_flags)
)
# todo: bad angle filter
# TODO: bad angle filter
# TODO filter bad bins out. Needs to happen here while everything is still
# per-timestamp.
# per-timestamp.

self.daily_lightcurve = DailyLightcurve(good_data)
self.daily_lightcurve = DailyLightcurve(good_l1b_data)

self.total_l1b_inputs = len(good_data["epoch"])
self.number_of_good_l1b_inputs = len(good_data["epoch"])
self.total_l1b_inputs = len(good_l1b_data["epoch"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not correct - this variable should be ALL the L1B inputs, including bad times.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original variable passed in was "good_data" which only included the good times. This change is only renaming that variable. I'm not sure I follow

self.number_of_good_l1b_inputs = len(good_l1b_data["epoch"])
self.identifier = -1 # TODO: retrieve from spin table
# TODO fill this in
self.bad_time_flag_occurrences = np.zeros((1, FLAG_LENGTH))

if len(good_data["epoch"]) != 0:
# Generate outputs that are passed in directly from L1B
self.start_time = good_data["epoch"].data[0]
self.end_time = good_data["epoch"].data[-1]
else:
# No good times in the file
self.start_time = l1b_dataset["imap_start_time"].data[0]
self.end_time = (
l1b_dataset["imap_start_time"].data[0]
+ l1b_dataset["imap_time_offset"].data[0]
)
# Generate outputs that are passed in directly from L1B
self.start_time = good_l1b_data["epoch"].data[0]
self.end_time = good_l1b_data["epoch"].data[-1]

self.filter_temperature_average = (
good_data["filter_temperature_average"]
good_l1b_data["filter_temperature_average"]
.mean(dim="epoch", keepdims=True)
.data
)
self.filter_temperature_std_dev = (
good_data["filter_temperature_average"].std(dim="epoch", keepdims=True).data
good_l1b_data["filter_temperature_average"]
.std(dim="epoch", keepdims=True)
.data
)
self.hv_voltage_average = (
good_data["hv_voltage_average"].mean(dim="epoch", keepdims=True).data
good_l1b_data["hv_voltage_average"].mean(dim="epoch", keepdims=True).data
)
self.hv_voltage_std_dev = (
good_data["hv_voltage_average"].std(dim="epoch", keepdims=True).data
good_l1b_data["hv_voltage_average"].std(dim="epoch", keepdims=True).data
)
self.spin_period_average = (
good_data["spin_period_average"].mean(dim="epoch", keepdims=True).data
good_l1b_data["spin_period_average"].mean(dim="epoch", keepdims=True).data
)
self.spin_period_std_dev = (
good_data["spin_period_average"].std(dim="epoch", keepdims=True).data
good_l1b_data["spin_period_average"].std(dim="epoch", keepdims=True).data
)
self.pulse_length_average = (
good_data["pulse_length_average"].mean(dim="epoch", keepdims=True).data
good_l1b_data["pulse_length_average"].mean(dim="epoch", keepdims=True).data
)
self.pulse_length_std_dev = (
good_data["pulse_length_average"].std(dim="epoch", keepdims=True).data
good_l1b_data["pulse_length_average"].std(dim="epoch", keepdims=True).data
)
self.spin_period_ground_average = (
good_data["spin_period_ground_average"]
good_l1b_data["spin_period_ground_average"]
.mean(dim="epoch", keepdims=True)
.data
)
self.spin_period_ground_std_dev = (
good_data["spin_period_ground_average"].std(dim="epoch", keepdims=True).data
good_l1b_data["spin_period_ground_average"]
.std(dim="epoch", keepdims=True)
.data
)
self.position_angle_offset_average = (
good_data["position_angle_offset_average"]
good_l1b_data["position_angle_offset_average"]
.mean(dim="epoch", keepdims=True)
.data
)
self.position_angle_offset_std_dev = (
good_data["position_angle_offset_average"]
good_l1b_data["position_angle_offset_average"]
.std(dim="epoch", keepdims=True)
.data
)
self.spacecraft_location_average = (
good_data["spacecraft_location_average"]
good_l1b_data["spacecraft_location_average"]
.mean(dim="epoch")
.data[np.newaxis, :]
)
self.spacecraft_location_std_dev = (
good_data["spacecraft_location_average"]
good_l1b_data["spacecraft_location_average"]
.std(dim="epoch")
.data[np.newaxis, :]
)
self.spacecraft_velocity_average = (
good_data["spacecraft_velocity_average"]
good_l1b_data["spacecraft_velocity_average"]
.mean(dim="epoch")
.data[np.newaxis, :]
)
self.spacecraft_velocity_std_dev = (
good_data["spacecraft_velocity_average"]
good_l1b_data["spacecraft_velocity_average"]
.std(dim="epoch")
.data[np.newaxis, :]
)
self.spin_axis_orientation_average = (
good_data["spin_axis_orientation_average"]
good_l1b_data["spin_axis_orientation_average"]
.mean(dim="epoch")
.data[np.newaxis, :]
)
self.spin_axis_orientation_std_dev = (
good_data["spin_axis_orientation_average"]
good_l1b_data["spin_axis_orientation_average"]
.std(dim="epoch")
.data[np.newaxis, :]
)

@classmethod
def create(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This duplicates the current init function and makes it confusing IMO (this makes L2 behave differently from the other levels.) Instead of making this a class function, why not make it a static function that can then be used at the beginning of init? Or just leaving it inside init?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't able to return None from init which is why I used a factory pattern here. To avoid returning dataset class objects that have bad data, I was trying to cut the processing early. I could raise an exception instead and catch it in glows_l2.py?

I was also trying to keep processing steps in the dataclass file to maintain the separation of concerns in the two L2 files. I interpreted glows_l2.py as creating an xarray dataset with cdf attributes while glows_l2_data.py handles filtering and processing of the data. If this isn't a strict separation of tasks, I can do this filtering before calling HistogramL2 in glows_l2.py if that's preferred

cls, l1b_dataset: xr.Dataset, pipeline_settings: PipelineSettings
) -> HistogramL2 | None:
"""
Create a HistogramL2 object from an L1B dataset, filtering out bad times.

Parameters
----------
l1b_dataset : xr.Dataset
GLOWS histogram L1B dataset, as produced by glows_l1b.py.
pipeline_settings : PipelineSettings
Pipeline settings object read from ancillary file.

Returns
-------
HistogramL2 or None
A HistogramL2 object if there are good times in the dataset,
or None if there are no good times.
"""
# Check if dataset contains good times
good_idx = cls.return_good_times(
l1b_dataset["flags"],
np.array(pipeline_settings.active_bad_time_flags, dtype=float),
)
if len(good_idx) == 0:
return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is confusing to me that a class creation step could return None. I would prefer that this check either occurs before class creation (maybe with a static method so you could do good_data = l1b_dataset.isel(HistogramL2.select_good_times(l1b_dataset["flags"], active_bad_time_flags]) and check if good_data has a length of zero before creating HistogramL2) or occur after class creation, since all the function calls should work and be very fast with no data and you can check to see if HistogramL2.number_of_good_l1b_inputs is not zero or something

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filtering could certainly occur before class creation. I was trying to keep all processing steps out of the glows_l2.py file but if that's not important this is an easy change. I think filtering first then creating the object is better than creating a bad object since those steps aren't necessary even if they're fast


# Proceed with creating the HistogramL2 object using only the good data
filtered_data = l1b_dataset.isel(epoch=good_idx)
return cls(filtered_data)

def filter_bad_bins(self, histograms: NDArray, bin_exclusions: NDArray) -> NDArray:
"""
Filter out bad bins from the histogram.
Expand Down Expand Up @@ -372,7 +391,7 @@ def return_good_times(flags: xr.DataArray, active_flags: NDArray) -> NDArray:
An array of indices for good times.
"""
if len(active_flags) != flags.shape[1]:
print("Active flags don't matched expected length")
print("Active flags don't match expected length")

# A good time is where all the active flags are equal to one.
# Here, we mask the active indices using active_flags, and then return the times
Expand Down
21 changes: 19 additions & 2 deletions imap_processing/tests/glows/test_glows_l2.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_glows_l2(
mock_ancillary_exclusions,
mock_pipeline_settings,
mock_conversion_table_dict,
caplog,
):
mock_spice_function.side_effect = mock_update_spice_parameters

Expand All @@ -54,11 +55,19 @@ def test_glows_l2(
mock_pipeline_settings,
mock_conversion_table_dict,
)

# Test case 1: L1B dataset has good times
l2 = glows_l2(l1b_hist_dataset, mock_pipeline_settings)[0]
assert l2.attrs["Logical_source"] == "imap_glows_l2_hist"

assert np.allclose(l2["filter_temperature_average"].values, [57.6], rtol=0.1)

# Test case 2: L1B dataset has no good times (all flags 0)
l1b_hist_dataset["flags"].values = np.zeros(l1b_hist_dataset.flags.shape)
caplog.set_level("WARNING")
result = glows_l2(l1b_hist_dataset, mock_pipeline_settings)
assert result == []
assert any(record.levelname == "WARNING" for record in caplog.records)


@patch.object(HistogramL1B, "update_spice_parameters", autospec=True)
def test_generate_l2(
Expand All @@ -83,7 +92,9 @@ def test_generate_l2(
pipeline_settings = PipelineSettings(
mock_pipeline_settings.sel(epoch=day, method="nearest")
)
l2 = HistogramL2(l1b_hist_dataset, pipeline_settings)

# Test case 1: L1B dataset has good times
l2 = HistogramL2.create(l1b_hist_dataset, pipeline_settings)

expected_values = {
"filter_temperature_average": [57.59],
Expand All @@ -109,6 +120,12 @@ def test_generate_l2(
l2.hv_voltage_std_dev, expected_values["hv_voltage_std_dev"], 0.01
)

# Test case 2: L1B dataset has no good times (all flags 0)
l1b_hist_dataset["flags"].values = np.zeros(l1b_hist_dataset.flags.shape)
ds = HistogramL2.create(l1b_hist_dataset, pipeline_settings)
expected_ds = None
assert ds == expected_ds


def test_bin_exclusions(l1b_hists):
# TODO test excluding bins as well
Expand Down