PennLINC · tien-tong · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -1003,14 +1003,12 @@ def _purge_associations(self, scans, n_cpus=1):
                         to_remove.append(sbref_file)
 
             # PERF-specific
-            if "/perf/" in str(scan):
-                if parse_file_entities(str(scan))["suffix"] == "asl":
-                    context = utils.img_to_new_ext(str(scan), "_aslcontext.tsv")
-                    if Path(context).exists():
-                        to_remove.append(context)
-                    labeling = utils.img_to_new_ext(str(scan), "_asllabeling.jpg")
-                    if Path(labeling).exists():
-                        to_remove.append(labeling)
+            if "/perf/" in str(scan) and parse_file_entities(str(scan)).get("suffix") == "asl":
+                scan_end = "_asl" + "".join(Path(scan).suffixes)
+                for associated_suffix in ("_aslcontext.tsv", "_asllabeling.jpg"):
+                    path = str(scan).replace(scan_end, associated_suffix)
+                    if Path(path).exists():
+                        to_remove.append(path)
 
         to_remove += list(scans)
 
@@ -1047,6 +1045,9 @@ def _purge_associations(self, scans, n_cpus=1):
                     cwd=path_prefix,
                 )
 
+            # Remove empty directories left after purge
+            utils.remove_empty_dirs_after_purge(to_remove, Path(self.path))
+
             self._invalidate_index()
 
         else:

diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py
@@ -258,4 +258,6 @@ def test_cubids_apply_intendedfor_large_dataset(tmp_path, build_bids_dataset, su
     with open(fmap_json) as f:
         metadata = json.load(f)
 
-    assert metadata["IntendedFor"] == ["ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz"]
+    assert metadata["IntendedFor"] == [
+        "ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz"
+    ]
diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py
@@ -269,6 +269,121 @@ def test_purge(tmp_path):
     assert not Path(json_name).exists()
 
 
+def test_purge_no_files_to_remove(tmp_path, capsys):
+    """Purge with a scans list that references only non-existent paths.
+
+    A purge list can reference non-existent paths (e.g., when the list is stale, files were
+    already removed or the list was from a previous run). When none of the listed paths exist in the
+    dataset, CuBIDS should skip association removals and report "Not running any
+    association removals".
+    """
+    data_root = get_data(tmp_path)
+    bids_dir = data_root / "complete"
+
+    # Build a purge list that points to a scan that does not exist in the dataset.
+    purge_path = str(tmp_path / "purge_scans.txt")
+    with open(purge_path, "w") as f:
+        f.write("sub-99/ses-99/func/nonexistent.nii.gz\n")
+
+    bod = CuBIDS(bids_dir, use_datalad=False)
+    bod.purge(purge_path)
+
+    out, _ = capsys.readouterr()
+    assert "Not running any association removals" in out
+
+
+def test_purge_removes_empty_dirs(tmp_path):
+    """Purge removes the scan, its sidecar JSON, and prunes empty subject/session dirs.
+
+    After deleting the only file(s) in a subject/session dir, the empty
+    directories (e.g. sub-01/ses-01/func and up) should be removed so the
+    dataset does not leave behind empty dirs.
+    """
+    bids_root = tmp_path / "bids"
+    bids_root.mkdir()
+    (bids_root / "dataset_description.json").write_text('{"Name":"test","BIDSVersion":"1.0.0"}')
+
+    # One subject with one BOLD scan and its JSON sidecar.
+    func_dir = bids_root / "sub-01" / "ses-01" / "func"
+    func_dir.mkdir(parents=True)
+    bold_nii = func_dir / "sub-01_ses-01_task-rest_bold.nii.gz"
+    bold_json = func_dir / "sub-01_ses-01_task-rest_bold.json"
+    bold_nii.touch()
+    bold_json.write_text("{}")
+
+    purge_path = str(tmp_path / "purge_scans.txt")
+    with open(purge_path, "w") as f:
+        f.write("sub-01/ses-01/func/sub-01_ses-01_task-rest_bold.nii.gz\n")
+
+    bod = CuBIDS(str(bids_root), use_datalad=False)
+    bod.purge(purge_path)
+
+    # Scan and sidecar are gone; empty subject tree is removed.
+    assert not bold_nii.exists()
+    assert not bold_json.exists()
+    assert not (bids_root / "sub-01").exists()
+
+
+def test_purge_perf_asl_removes_aslcontext_and_asllabeling(tmp_path, build_bids_dataset):
+    """Purging a PERF ASL scan also removes its BIDS companion files."""
+    bids_root = build_bids_dataset(
+        tmp_path=tmp_path,
+        dataset_name="perf_purge",
+        skeleton_name="skeleton_perf_m0.yml",
+    )
+    sub, ses = "sub-01", "ses-01"
+    perf_dir = bids_root / sub / ses / "perf"
+    asl_nii = perf_dir / f"{sub}_{ses}_asl.nii.gz"
+    asl_json = perf_dir / f"{sub}_{ses}_asl.json"
+    aslcontext = perf_dir / f"{sub}_{ses}_aslcontext.tsv"
+    asllabeling = perf_dir / f"{sub}_{ses}_asllabeling.jpg"
+
+    # Add the ASL sidecar and companion files that BIDS expects alongside the ASL NIfTI.
+    asl_json.write_text("{}")
+    aslcontext.write_text("label\ncontrol\n")
+    asllabeling.write_bytes(b"fake jpg")
+    assert asl_nii.exists()
+    assert asl_json.exists()
+    assert aslcontext.exists()
+    assert asllabeling.exists()
+
+    purge_path = str(tmp_path / "purge_scans.txt")
+    with open(purge_path, "w") as f:
+        f.write(f"{sub}/{ses}/perf/{sub}_{ses}_asl.nii.gz\n")
+
+    bod = CuBIDS(str(bids_root), use_datalad=False)
+    bod.purge(purge_path)
+
+    # Main ASL file, its JSON sidecar, and both companion files must be removed.
+    assert not asl_nii.exists()
+    assert not asl_json.exists()
+    assert not aslcontext.exists()
+    assert not asllabeling.exists()
+
+
+def test_purge_parameter_groups_message(tmp_path):
+    """ Test that_purge_associations works when scans_txt is None.
+
+    scans_txt is None when purge is triggered from the apply workflow (Parameter
+    Group deletions): that path calls _purge_associations(to_remove) directly and
+    never purge(scans_txt). 
+
+    This test ensures that with scans_txt is None we still remove the scan and its
+    associations (and the "Parameter Groups" commit message is used when datalad
+    is enabled).
+    """
+    data_root = get_data(tmp_path)
+    bids_dir = data_root / "complete"
+    scan_name = "sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz"
+    full_path = str(bids_dir / scan_name)
+
+    bod = CuBIDS(bids_dir, use_datalad=False)
+    assert bod.scans_txt is None  # No scans.txt → Parameter Groups path.
+
+    bod._purge_associations([full_path])
+    assert not Path(full_path).exists()
+
+
 def test_bad_json_merge(tmp_path):
     """Test that an unsuccessful merge returns an error.
 
@@ -929,6 +1044,7 @@ def test_apply_tsv_changes(tmp_path):
         if "bold" in str(full_path):
             # Construct physio file paths by replacing the scan suffix with _physio
             from bids.layout import parse_file_entities
+
             old_suffix = parse_file_entities(str(full_path))["suffix"]
             old_ext = "".join(full_path.suffixes)
             scan_end = "_" + old_suffix + old_ext
@@ -1166,9 +1282,9 @@ def test_validator(tmp_path):
     call = build_validator_call(str(data_root) + "/complete")
     ret = run_validator(call)
 
-    assert ret.returncode == 16, (
+    assert ret.returncode != 0, (
         "Validator was expected to fail after corrupting files, "
-        f"but returned code {ret.returncode}.\n"
+        f"this returned code {ret.returncode}.\n"
         "Corrupted files: removed JSON sidecar and modified NIfTI header.\n"
         f"STDOUT:\n{ret.stdout.decode('UTF-8', errors='replace')}\n"
         f"STDERR:\n{ret.stderr.decode('UTF-8', errors='replace') if getattr(ret, 'stderr', None) else ''}"

diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py
@@ -263,7 +263,17 @@ def test_validate_subject_scope_with_n_cpus(tmp_path, build_bids_dataset):
     output_prefix = tmp_path / "validation_parallel"
 
     # This should complete without error
-    _main(["validate", str(bids_dir), str(output_prefix), "--validation-scope", "subject", "--n-cpus", "1"])
+    _main(
+        [
+            "validate",
+            str(bids_dir),
+            str(output_prefix),
+            "--validation-scope",
+            "subject",
+            "--n-cpus",
+            "1",
+        ]
+    )
 
     # Verify the command completed successfully by checking if the output files exist
     assert (output_prefix.parent / f"{output_prefix.name}_validation.tsv").exists()
@@ -314,7 +324,9 @@ def test_add_nifti_info_command_with_test_dataset(tmp_path):
 
 def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build_bids_dataset):
     """Test the print-metadata-fields command with the test BIDS dataset."""
-    bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset")
+    bids_dir = _build_cli_dataset(
+        tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset"
+    )
 
     # Run print-metadata-fields
     _main(["print-metadata-fields", str(bids_dir)])
@@ -327,7 +339,9 @@ def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build
 
 def test_remove_metadata_fields_command_with_test_dataset(tmp_path, build_bids_dataset):
     """Test the remove-metadata-fields command with the test BIDS dataset."""
-    bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset")
+    bids_dir = _build_cli_dataset(
+        tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset"
+    )
 
     # Get a sample JSON sidecar
     json_file = next(bids_dir.rglob("*.json"))

diff --git a/cubids/tests/test_utils.py b/cubids/tests/test_utils.py
@@ -1,5 +1,8 @@
 """Tests for the utils module."""
 
+from pathlib import Path
+from unittest.mock import patch
+
 import pandas as pd
 
 from cubids import utils
@@ -156,7 +159,6 @@ def test_cluster_single_parameters():
         [0, 0, 0, 0, 0, 0, 1, 2],
     )
 
-
     # Change the tolerance for SliceTiming
     config["sidecar_params"]["func"]["SliceTiming"]["tolerance"] = 0.5
     out_df = utils.cluster_single_parameters(
@@ -201,3 +203,33 @@ def test_ignore_entities_in_entity_set(tmp_path):
     finally:
         NON_KEY_ENTITIES.clear()
         NON_KEY_ENTITIES.update(original_non_key)
+
+
+def test_remove_empty_dirs_after_purge_removes_empty_dirs(tmp_path):
+    """Test that remove_empty_dirs_after_purge removes empty parent dirs up to BIDS root."""
+    bids_root = tmp_path / "bids"
+    func_dir = bids_root / "sub-01" / "ses-01" / "func"
+    func_dir.mkdir(parents=True)
+    removed_file = func_dir / "file.nii.gz"
+    removed_file.touch()
+    removed_file.unlink()  # File already removed; dirs are now empty
+    utils.remove_empty_dirs_after_purge([str(removed_file)], bids_root)
+    assert not (bids_root / "sub-01").exists()
+    assert not (bids_root / "sub-01" / "ses-01").exists()
+    assert not func_dir.exists()
+
+
+def test_remove_empty_dirs_after_purge_handles_rmdir_error(tmp_path, capsys):
+    """Test that remove_empty_dirs_after_purge catches OSError on rmdir and prints message."""
+    bids_root = tmp_path / "bids"
+    (bids_root / "sub-01" / "ses-01" / "func").mkdir(parents=True)
+    removed_file = bids_root / "sub-01" / "ses-01" / "func" / "file.nii.gz"
+    removed_file.touch()
+    removed_file.unlink()
+    removed_paths = [str(removed_file)]
+
+    with patch.object(Path, "rmdir", side_effect=OSError(39, "Directory not empty")):
+        utils.remove_empty_dirs_after_purge(removed_paths, bids_root)
+
+    out, _ = capsys.readouterr()
+    assert "Failed to remove directory" in out
diff --git a/cubids/utils.py b/cubids/utils.py
@@ -754,6 +754,37 @@ def img_to_new_ext(img_path, new_ext):
         return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext
 
 
+def remove_empty_dirs_after_purge(removed_paths, bids_root):
+    """Remove directories that are empty after purging the given files.
+
+    Walks from the deepest directory up toward the BIDS root and removes
+    any directory that is empty.
+
+    Parameters
+    ----------
+    removed_paths : list of str or Path
+        Paths to files that were removed by purge.
+    bids_root : Path or str
+        Root of the BIDS dataset; directories are only removed inside this tree.
+    """
+    bids_root = Path(bids_root).resolve()
+    directories_to_check = set()
+    for removed_path in removed_paths:
+        parent_dir = Path(removed_path).resolve().parent
+        while parent_dir != bids_root and bids_root in parent_dir.parents:
+            directories_to_check.add(parent_dir)
+            parent_dir = parent_dir.parent
+    # Process deepest dirs first so parent can become empty after child is removed
+    for directory in sorted(
+        directories_to_check, key=lambda dir_path: len(dir_path.parts), reverse=True
+    ):
+        if directory.exists() and directory.is_dir() and not any(directory.iterdir()):
+            try:
+                directory.rmdir()
+            except OSError as e:
+                print(f"Failed to remove directory {directory}: {e}")
+
+
 def get_entity_value(path, key):
     """Given a filepath and BIDS key name, return the value associated with the key.