diff --git a/doc/changes/dev/13647.newfeature.rst b/doc/changes/dev/13647.newfeature.rst new file mode 100644 index 00000000000..3b4ecc7ed53 --- /dev/null +++ b/doc/changes/dev/13647.newfeature.rst @@ -0,0 +1 @@ +Add finer-grained control to :func:`mne.io.anonymize_info` and the related methods :meth:`mne.io.Raw.anonymize`, :meth:`mne.Epochs.anonymize`, and :meth:`mne.Evoked.anonymize` by allowing ``keep_his`` to accept one or more strings; ``keep_his=True`` remains unchanged (retaining ``"his_id"``, ``"sex"``, and ``"hand"`` from anonymization), but individual fields can now also be retained (the default ``keep_his=False`` also remains unchanged and still anonymizes all fields), by `Clemens Brunner`_. diff --git a/mne/_fiff/meas_info.py b/mne/_fiff/meas_info.py index 8baae82f4b0..92a4cf58531 100644 --- a/mne/_fiff/meas_info.py +++ b/mne/_fiff/meas_info.py @@ -3668,6 +3668,18 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None): """ _validate_type(info, "info", "self") + valid_fields = {"his_id", "sex", "hand"} + if isinstance(keep_his, bool): # True means keep all fields, False means keep none + keep_fields = valid_fields if keep_his else set() + elif isinstance(keep_his, str): + _check_option("keep_his", keep_his, valid_fields) + keep_fields = {keep_his} + else: + _validate_type(keep_his, (list, tuple, set), "keep_his") + keep_fields = set(keep_his) + for field in keep_fields: + _check_option("keep_his", field, valid_fields) + default_anon_dos = datetime.datetime( 2000, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc ) @@ -3718,17 +3730,19 @@ def anonymize_info(info, daysback=None, keep_his=False, verbose=None): if subject_info is not None: if subject_info.get("id") is not None: subject_info["id"] = default_subject_id - if keep_his: + if keep_fields: logger.info( - "Not fully anonymizing info - keeping his_id, sex, and hand info" + f"Not fully anonymizing info - keeping {', '.join(sorted(keep_fields))}" + " of subject_info" ) - else: + if "his_id" not in keep_fields: if subject_info.get("his_id") is not None: subject_info["his_id"] = str(default_subject_id) + if "sex" not in keep_fields: if subject_info.get("sex") is not None: subject_info["sex"] = default_sex - if subject_info.get("hand") is not None: - del subject_info["hand"] # there's no "unknown" setting + if "hand" not in keep_fields: + subject_info.pop("hand", None) # there's no "unknown" setting for key in ("last_name", "first_name", "middle_name"): if subject_info.get(key) is not None: diff --git a/mne/_fiff/tests/test_meas_info.py b/mne/_fiff/tests/test_meas_info.py index d0effacde91..24a6ca04e26 100644 --- a/mne/_fiff/tests/test_meas_info.py +++ b/mne/_fiff/tests/test_meas_info.py @@ -743,7 +743,7 @@ def _test_anonymize_info(base_info, tmp_path): base_info["subject_info"].update( birthday=date(1987, 4, 8), his_id="foobar", - sex=0, + sex=1, ) # generate expected info... @@ -812,7 +812,7 @@ def _adjust_back(e_i, dt): exp_info_2 = exp_info.copy() with exp_info_2._unlock(): exp_info_2["subject_info"]["his_id"] = "foobar" - exp_info_2["subject_info"]["sex"] = 0 + exp_info_2["subject_info"]["sex"] = 1 exp_info_2["subject_info"]["hand"] = 1 # exp 3 tests is a supplied daysback @@ -842,12 +842,54 @@ def _check_equiv(got, want, err_msg): new_info = anonymize_info(base_info.copy(), keep_his=True) _check_equiv(new_info, exp_info_2, err_msg="anon keep_his mismatch") + # keep only his_id + new_info = anonymize_info(base_info.copy(), keep_his="his_id") + assert new_info["subject_info"]["his_id"] == "foobar" + assert new_info["subject_info"]["sex"] == 0 + assert "hand" not in new_info["subject_info"] + + # keep only sex + new_info = anonymize_info(base_info.copy(), keep_his="sex") + assert new_info["subject_info"]["his_id"] == "0" + assert new_info["subject_info"]["sex"] == 1 + assert "hand" not in new_info["subject_info"] + + # keep only hand + new_info = anonymize_info(base_info.copy(), keep_his="hand") + assert new_info["subject_info"]["his_id"] == "0" + assert new_info["subject_info"]["sex"] == 0 + assert new_info["subject_info"]["hand"] == 1 + + # keep his_id and sex + new_info = anonymize_info(base_info.copy(), keep_his=["his_id", "sex"]) + assert new_info["subject_info"]["his_id"] == "foobar" + assert new_info["subject_info"]["sex"] == 1 + assert "hand" not in new_info["subject_info"] + + # keep only hand + new_info = anonymize_info(base_info.copy(), keep_his=["hand"]) + assert new_info["subject_info"]["his_id"] == "0" + assert new_info["subject_info"]["sex"] == 0 + assert new_info["subject_info"]["hand"] == 1 + + # keep his_id and hand + new_info = anonymize_info(base_info.copy(), keep_his=("his_id", "hand")) + assert new_info["subject_info"]["his_id"] == "foobar" + assert new_info["subject_info"]["sex"] == 0 + assert new_info["subject_info"]["hand"] == 1 + + # invalid keep_his values + with pytest.raises(ValueError, match="Invalid value"): + anonymize_info(base_info.copy(), keep_his="invalid_field") + + with pytest.raises(ValueError, match="Invalid value"): + anonymize_info(base_info.copy(), keep_his=["his_id", "invalid"]) + new_info = anonymize_info(base_info.copy(), daysback=delta_t_2.days) _check_equiv(new_info, exp_info_3, err_msg="anon daysback mismatch") with pytest.raises(RuntimeError, match="anonymize_info generated"): anonymize_info(base_info.copy(), daysback=delta_t_3.days) - # assert_object_equal(new_info, exp_info_4) # test with meas_date = None with base_info._unlock(): diff --git a/mne/utils/docs.py b/mne/utils/docs.py index cf15873c2ff..92093fc0eab 100644 --- a/mne/utils/docs.py +++ b/mne/utils/docs.py @@ -249,8 +249,9 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): - meas_date, file_id, meas_id A default value, or as specified by ``daysback``. - subject_info - Default values, except for 'birthday' which is adjusted - to maintain the subject age. + Default values, except for 'birthday', which is adjusted to maintain the subject + age. If ``keep_his`` is not ``False``, then the fields 'his_id', 'sex', and + 'hand' are not anonymized, depending on the value of ``keep_his``. - experimenter, proj_name, description Default strings. - utc_offset @@ -2276,12 +2277,16 @@ def _reflow_param_docstring(docstring, has_first_line=True, width=75): # K docdict["keep_his_anonymize_info"] = """ -keep_his : bool - If ``True``, ``his_id`` of ``subject_info`` will **not** be overwritten. - Defaults to ``False``. - - .. warning:: This could mean that ``info`` is not fully - anonymized. Use with caution. +keep_his : bool | "his_id" | "sex" | "hand" | sequence of {"his_id", "sex", "hand"} + If ``True``, ``his_id``, ``sex``, and ``hand`` of ``subject_info`` will **not** be + overwritten. If ``False``, these fields will be anonymized. If ``"his_id"``, + ``"sex"``, or ``"hand"`` (or any combination thereof in a sequence), only those + fields will **not** be anonymized. Defaults to ``False``. + + .. warning:: Setting ``keep_his`` to anything other than ``False`` may result in + ``info`` not being fully anonymized. Use with caution. + .. versionchanged:: 1.12 + Added support for sequence of ``str``. """ docdict["kit_badcoils"] = """