Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,14 +1003,12 @@ def _purge_associations(self, scans, n_cpus=1):
to_remove.append(sbref_file)

# PERF-specific
if "/perf/" in str(scan):
if parse_file_entities(str(scan))["suffix"] == "asl":
context = utils.img_to_new_ext(str(scan), "_aslcontext.tsv")
if Path(context).exists():
to_remove.append(context)
labeling = utils.img_to_new_ext(str(scan), "_asllabeling.jpg")
if Path(labeling).exists():
to_remove.append(labeling)
if "/perf/" in str(scan) and parse_file_entities(str(scan)).get("suffix") == "asl":
scan_end = "_asl" + "".join(Path(scan).suffixes)
for associated_suffix in ("_aslcontext.tsv", "_asllabeling.jpg"):
path = str(scan).replace(scan_end, associated_suffix)
if Path(path).exists():
to_remove.append(path)

to_remove += list(scans)

Expand Down Expand Up @@ -1047,6 +1045,9 @@ def _purge_associations(self, scans, n_cpus=1):
cwd=path_prefix,
)

# Remove empty directories left after purge
utils.remove_empty_dirs_after_purge(to_remove, Path(self.path))

self._invalidate_index()

else:
Expand Down
4 changes: 3 additions & 1 deletion cubids/tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,4 +258,6 @@ def test_cubids_apply_intendedfor_large_dataset(tmp_path, build_bids_dataset, su
with open(fmap_json) as f:
metadata = json.load(f)

assert metadata["IntendedFor"] == ["ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz"]
assert metadata["IntendedFor"] == [
"ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz"
]
120 changes: 118 additions & 2 deletions cubids/tests/test_bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,121 @@ def test_purge(tmp_path):
assert not Path(json_name).exists()


def test_purge_no_files_to_remove(tmp_path, capsys):
"""Purge with a scans list that references only non-existent paths.

A purge list can reference non-existent paths (e.g., when the list is stale, files were
already removed or the list was from a previous run). When none of the listed paths exist in the
dataset, CuBIDS should skip association removals and report "Not running any
association removals".
"""
data_root = get_data(tmp_path)
bids_dir = data_root / "complete"

# Build a purge list that points to a scan that does not exist in the dataset.
purge_path = str(tmp_path / "purge_scans.txt")
with open(purge_path, "w") as f:
f.write("sub-99/ses-99/func/nonexistent.nii.gz\n")

bod = CuBIDS(bids_dir, use_datalad=False)
bod.purge(purge_path)

out, _ = capsys.readouterr()
assert "Not running any association removals" in out


def test_purge_removes_empty_dirs(tmp_path):
"""Purge removes the scan, its sidecar JSON, and prunes empty subject/session dirs.

After deleting the only file(s) in a subject/session dir, the empty
directories (e.g. sub-01/ses-01/func and up) should be removed so the
dataset does not leave behind empty dirs.
"""
bids_root = tmp_path / "bids"
bids_root.mkdir()
(bids_root / "dataset_description.json").write_text('{"Name":"test","BIDSVersion":"1.0.0"}')

# One subject with one BOLD scan and its JSON sidecar.
func_dir = bids_root / "sub-01" / "ses-01" / "func"
func_dir.mkdir(parents=True)
bold_nii = func_dir / "sub-01_ses-01_task-rest_bold.nii.gz"
bold_json = func_dir / "sub-01_ses-01_task-rest_bold.json"
bold_nii.touch()
bold_json.write_text("{}")

purge_path = str(tmp_path / "purge_scans.txt")
with open(purge_path, "w") as f:
f.write("sub-01/ses-01/func/sub-01_ses-01_task-rest_bold.nii.gz\n")

bod = CuBIDS(str(bids_root), use_datalad=False)
bod.purge(purge_path)

# Scan and sidecar are gone; empty subject tree is removed.
assert not bold_nii.exists()
assert not bold_json.exists()
assert not (bids_root / "sub-01").exists()


def test_purge_perf_asl_removes_aslcontext_and_asllabeling(tmp_path, build_bids_dataset):
"""Purging a PERF ASL scan also removes its BIDS companion files."""
bids_root = build_bids_dataset(
tmp_path=tmp_path,
dataset_name="perf_purge",
skeleton_name="skeleton_perf_m0.yml",
)
sub, ses = "sub-01", "ses-01"
perf_dir = bids_root / sub / ses / "perf"
asl_nii = perf_dir / f"{sub}_{ses}_asl.nii.gz"
asl_json = perf_dir / f"{sub}_{ses}_asl.json"
aslcontext = perf_dir / f"{sub}_{ses}_aslcontext.tsv"
asllabeling = perf_dir / f"{sub}_{ses}_asllabeling.jpg"

# Add the ASL sidecar and companion files that BIDS expects alongside the ASL NIfTI.
asl_json.write_text("{}")
aslcontext.write_text("label\ncontrol\n")
asllabeling.write_bytes(b"fake jpg")
assert asl_nii.exists()
assert asl_json.exists()
assert aslcontext.exists()
assert asllabeling.exists()

purge_path = str(tmp_path / "purge_scans.txt")
with open(purge_path, "w") as f:
f.write(f"{sub}/{ses}/perf/{sub}_{ses}_asl.nii.gz\n")

bod = CuBIDS(str(bids_root), use_datalad=False)
bod.purge(purge_path)

# Main ASL file, its JSON sidecar, and both companion files must be removed.
assert not asl_nii.exists()
assert not asl_json.exists()
assert not aslcontext.exists()
assert not asllabeling.exists()


def test_purge_parameter_groups_message(tmp_path):
""" Test that_purge_associations works when scans_txt is None.

scans_txt is None when purge is triggered from the apply workflow (Parameter
Group deletions): that path calls _purge_associations(to_remove) directly and
never purge(scans_txt).

This test ensures that with scans_txt is None we still remove the scan and its
associations (and the "Parameter Groups" commit message is used when datalad
is enabled).
"""
data_root = get_data(tmp_path)
bids_dir = data_root / "complete"
scan_name = "sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz"
full_path = str(bids_dir / scan_name)

bod = CuBIDS(bids_dir, use_datalad=False)
assert bod.scans_txt is None # No scans.txt → Parameter Groups path.

bod._purge_associations([full_path])
assert not Path(full_path).exists()


def test_bad_json_merge(tmp_path):
"""Test that an unsuccessful merge returns an error.

Expand Down Expand Up @@ -929,6 +1044,7 @@ def test_apply_tsv_changes(tmp_path):
if "bold" in str(full_path):
# Construct physio file paths by replacing the scan suffix with _physio
from bids.layout import parse_file_entities

old_suffix = parse_file_entities(str(full_path))["suffix"]
old_ext = "".join(full_path.suffixes)
scan_end = "_" + old_suffix + old_ext
Expand Down Expand Up @@ -1166,9 +1282,9 @@ def test_validator(tmp_path):
call = build_validator_call(str(data_root) + "/complete")
ret = run_validator(call)

assert ret.returncode == 16, (
assert ret.returncode != 0, (
"Validator was expected to fail after corrupting files, "
f"but returned code {ret.returncode}.\n"
f"this returned code {ret.returncode}.\n"
"Corrupted files: removed JSON sidecar and modified NIfTI header.\n"
f"STDOUT:\n{ret.stdout.decode('UTF-8', errors='replace')}\n"
f"STDERR:\n{ret.stderr.decode('UTF-8', errors='replace') if getattr(ret, 'stderr', None) else ''}"
Expand Down
20 changes: 17 additions & 3 deletions cubids/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,17 @@ def test_validate_subject_scope_with_n_cpus(tmp_path, build_bids_dataset):
output_prefix = tmp_path / "validation_parallel"

# This should complete without error
_main(["validate", str(bids_dir), str(output_prefix), "--validation-scope", "subject", "--n-cpus", "1"])
_main(
[
"validate",
str(bids_dir),
str(output_prefix),
"--validation-scope",
"subject",
"--n-cpus",
"1",
]
)

# Verify the command completed successfully by checking if the output files exist
assert (output_prefix.parent / f"{output_prefix.name}_validation.tsv").exists()
Expand Down Expand Up @@ -314,7 +324,9 @@ def test_add_nifti_info_command_with_test_dataset(tmp_path):

def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build_bids_dataset):
"""Test the print-metadata-fields command with the test BIDS dataset."""
bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset")
bids_dir = _build_cli_dataset(
tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset"
)

# Run print-metadata-fields
_main(["print-metadata-fields", str(bids_dir)])
Expand All @@ -327,7 +339,9 @@ def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build

def test_remove_metadata_fields_command_with_test_dataset(tmp_path, build_bids_dataset):
"""Test the remove-metadata-fields command with the test BIDS dataset."""
bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset")
bids_dir = _build_cli_dataset(
tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset"
)

# Get a sample JSON sidecar
json_file = next(bids_dir.rglob("*.json"))
Expand Down
34 changes: 33 additions & 1 deletion cubids/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Tests for the utils module."""

from pathlib import Path
from unittest.mock import patch

import pandas as pd

from cubids import utils
Expand Down Expand Up @@ -156,7 +159,6 @@ def test_cluster_single_parameters():
[0, 0, 0, 0, 0, 0, 1, 2],
)


# Change the tolerance for SliceTiming
config["sidecar_params"]["func"]["SliceTiming"]["tolerance"] = 0.5
out_df = utils.cluster_single_parameters(
Expand Down Expand Up @@ -201,3 +203,33 @@ def test_ignore_entities_in_entity_set(tmp_path):
finally:
NON_KEY_ENTITIES.clear()
NON_KEY_ENTITIES.update(original_non_key)


def test_remove_empty_dirs_after_purge_removes_empty_dirs(tmp_path):
"""Test that remove_empty_dirs_after_purge removes empty parent dirs up to BIDS root."""
bids_root = tmp_path / "bids"
func_dir = bids_root / "sub-01" / "ses-01" / "func"
func_dir.mkdir(parents=True)
removed_file = func_dir / "file.nii.gz"
removed_file.touch()
removed_file.unlink() # File already removed; dirs are now empty
utils.remove_empty_dirs_after_purge([str(removed_file)], bids_root)
assert not (bids_root / "sub-01").exists()
assert not (bids_root / "sub-01" / "ses-01").exists()
assert not func_dir.exists()


def test_remove_empty_dirs_after_purge_handles_rmdir_error(tmp_path, capsys):
"""Test that remove_empty_dirs_after_purge catches OSError on rmdir and prints message."""
bids_root = tmp_path / "bids"
(bids_root / "sub-01" / "ses-01" / "func").mkdir(parents=True)
removed_file = bids_root / "sub-01" / "ses-01" / "func" / "file.nii.gz"
removed_file.touch()
removed_file.unlink()
removed_paths = [str(removed_file)]

with patch.object(Path, "rmdir", side_effect=OSError(39, "Directory not empty")):
utils.remove_empty_dirs_after_purge(removed_paths, bids_root)

out, _ = capsys.readouterr()
assert "Failed to remove directory" in out
31 changes: 31 additions & 0 deletions cubids/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,37 @@ def img_to_new_ext(img_path, new_ext):
return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext


def remove_empty_dirs_after_purge(removed_paths, bids_root):
"""Remove directories that are empty after purging the given files.

Walks from the deepest directory up toward the BIDS root and removes
any directory that is empty.

Parameters
----------
removed_paths : list of str or Path
Paths to files that were removed by purge.
bids_root : Path or str
Root of the BIDS dataset; directories are only removed inside this tree.
"""
bids_root = Path(bids_root).resolve()
directories_to_check = set()
for removed_path in removed_paths:
parent_dir = Path(removed_path).resolve().parent
while parent_dir != bids_root and bids_root in parent_dir.parents:
directories_to_check.add(parent_dir)
parent_dir = parent_dir.parent
# Process deepest dirs first so parent can become empty after child is removed
for directory in sorted(
directories_to_check, key=lambda dir_path: len(dir_path.parts), reverse=True
):
if directory.exists() and directory.is_dir() and not any(directory.iterdir()):
try:
directory.rmdir()
except OSError as e:
print(f"Failed to remove directory {directory}: {e}")


def get_entity_value(path, key):
"""Given a filepath and BIDS key name, return the value associated with the key.

Expand Down