diff --git a/cubids/cubids.py b/cubids/cubids.py index 0849e6ec..d34ec3da 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1003,14 +1003,12 @@ def _purge_associations(self, scans, n_cpus=1): to_remove.append(sbref_file) # PERF-specific - if "/perf/" in str(scan): - if parse_file_entities(str(scan))["suffix"] == "asl": - context = utils.img_to_new_ext(str(scan), "_aslcontext.tsv") - if Path(context).exists(): - to_remove.append(context) - labeling = utils.img_to_new_ext(str(scan), "_asllabeling.jpg") - if Path(labeling).exists(): - to_remove.append(labeling) + if "/perf/" in str(scan) and parse_file_entities(str(scan)).get("suffix") == "asl": + scan_end = "_asl" + "".join(Path(scan).suffixes) + for associated_suffix in ("_aslcontext.tsv", "_asllabeling.jpg"): + path = str(scan).replace(scan_end, associated_suffix) + if Path(path).exists(): + to_remove.append(path) to_remove += list(scans) @@ -1047,6 +1045,9 @@ def _purge_associations(self, scans, n_cpus=1): cwd=path_prefix, ) + # Remove empty directories left after purge + utils.remove_empty_dirs_after_purge(to_remove, Path(self.path)) + self._invalidate_index() else: diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index 81ec6722..ae5848b0 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -258,4 +258,6 @@ def test_cubids_apply_intendedfor_large_dataset(tmp_path, build_bids_dataset, su with open(fmap_json) as f: metadata = json.load(f) - assert metadata["IntendedFor"] == ["ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz"] + assert metadata["IntendedFor"] == [ + "ses-01/dwi/sub-ABC_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz" + ] diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 775b5419..f868aa88 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -269,6 +269,121 @@ def test_purge(tmp_path): assert not Path(json_name).exists() +def test_purge_no_files_to_remove(tmp_path, capsys): + """Purge with a scans list that references only non-existent paths. + + A purge list can reference non-existent paths (e.g., when the list is stale, files were + already removed or the list was from a previous run). When none of the listed paths exist in the + dataset, CuBIDS should skip association removals and report "Not running any + association removals". + """ + data_root = get_data(tmp_path) + bids_dir = data_root / "complete" + + # Build a purge list that points to a scan that does not exist in the dataset. + purge_path = str(tmp_path / "purge_scans.txt") + with open(purge_path, "w") as f: + f.write("sub-99/ses-99/func/nonexistent.nii.gz\n") + + bod = CuBIDS(bids_dir, use_datalad=False) + bod.purge(purge_path) + + out, _ = capsys.readouterr() + assert "Not running any association removals" in out + + +def test_purge_removes_empty_dirs(tmp_path): + """Purge removes the scan, its sidecar JSON, and prunes empty subject/session dirs. + + After deleting the only file(s) in a subject/session dir, the empty + directories (e.g. sub-01/ses-01/func and up) should be removed so the + dataset does not leave behind empty dirs. + """ + bids_root = tmp_path / "bids" + bids_root.mkdir() + (bids_root / "dataset_description.json").write_text('{"Name":"test","BIDSVersion":"1.0.0"}') + + # One subject with one BOLD scan and its JSON sidecar. + func_dir = bids_root / "sub-01" / "ses-01" / "func" + func_dir.mkdir(parents=True) + bold_nii = func_dir / "sub-01_ses-01_task-rest_bold.nii.gz" + bold_json = func_dir / "sub-01_ses-01_task-rest_bold.json" + bold_nii.touch() + bold_json.write_text("{}") + + purge_path = str(tmp_path / "purge_scans.txt") + with open(purge_path, "w") as f: + f.write("sub-01/ses-01/func/sub-01_ses-01_task-rest_bold.nii.gz\n") + + bod = CuBIDS(str(bids_root), use_datalad=False) + bod.purge(purge_path) + + # Scan and sidecar are gone; empty subject tree is removed. + assert not bold_nii.exists() + assert not bold_json.exists() + assert not (bids_root / "sub-01").exists() + + +def test_purge_perf_asl_removes_aslcontext_and_asllabeling(tmp_path, build_bids_dataset): + """Purging a PERF ASL scan also removes its BIDS companion files.""" + bids_root = build_bids_dataset( + tmp_path=tmp_path, + dataset_name="perf_purge", + skeleton_name="skeleton_perf_m0.yml", + ) + sub, ses = "sub-01", "ses-01" + perf_dir = bids_root / sub / ses / "perf" + asl_nii = perf_dir / f"{sub}_{ses}_asl.nii.gz" + asl_json = perf_dir / f"{sub}_{ses}_asl.json" + aslcontext = perf_dir / f"{sub}_{ses}_aslcontext.tsv" + asllabeling = perf_dir / f"{sub}_{ses}_asllabeling.jpg" + + # Add the ASL sidecar and companion files that BIDS expects alongside the ASL NIfTI. + asl_json.write_text("{}") + aslcontext.write_text("label\ncontrol\n") + asllabeling.write_bytes(b"fake jpg") + assert asl_nii.exists() + assert asl_json.exists() + assert aslcontext.exists() + assert asllabeling.exists() + + purge_path = str(tmp_path / "purge_scans.txt") + with open(purge_path, "w") as f: + f.write(f"{sub}/{ses}/perf/{sub}_{ses}_asl.nii.gz\n") + + bod = CuBIDS(str(bids_root), use_datalad=False) + bod.purge(purge_path) + + # Main ASL file, its JSON sidecar, and both companion files must be removed. + assert not asl_nii.exists() + assert not asl_json.exists() + assert not aslcontext.exists() + assert not asllabeling.exists() + + +def test_purge_parameter_groups_message(tmp_path): + """ Test that_purge_associations works when scans_txt is None. + + scans_txt is None when purge is triggered from the apply workflow (Parameter + Group deletions): that path calls _purge_associations(to_remove) directly and + never purge(scans_txt). + + This test ensures that with scans_txt is None we still remove the scan and its + associations (and the "Parameter Groups" commit message is used when datalad + is enabled). + """ + data_root = get_data(tmp_path) + bids_dir = data_root / "complete" + scan_name = "sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz" + full_path = str(bids_dir / scan_name) + + bod = CuBIDS(bids_dir, use_datalad=False) + assert bod.scans_txt is None # No scans.txt → Parameter Groups path. + + bod._purge_associations([full_path]) + assert not Path(full_path).exists() + + def test_bad_json_merge(tmp_path): """Test that an unsuccessful merge returns an error. @@ -929,6 +1044,7 @@ def test_apply_tsv_changes(tmp_path): if "bold" in str(full_path): # Construct physio file paths by replacing the scan suffix with _physio from bids.layout import parse_file_entities + old_suffix = parse_file_entities(str(full_path))["suffix"] old_ext = "".join(full_path.suffixes) scan_end = "_" + old_suffix + old_ext @@ -1166,9 +1282,9 @@ def test_validator(tmp_path): call = build_validator_call(str(data_root) + "/complete") ret = run_validator(call) - assert ret.returncode == 16, ( + assert ret.returncode != 0, ( "Validator was expected to fail after corrupting files, " - f"but returned code {ret.returncode}.\n" + f"this returned code {ret.returncode}.\n" "Corrupted files: removed JSON sidecar and modified NIfTI header.\n" f"STDOUT:\n{ret.stdout.decode('UTF-8', errors='replace')}\n" f"STDERR:\n{ret.stderr.decode('UTF-8', errors='replace') if getattr(ret, 'stderr', None) else ''}" diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index 6fdd707b..c98bcbc8 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -263,7 +263,17 @@ def test_validate_subject_scope_with_n_cpus(tmp_path, build_bids_dataset): output_prefix = tmp_path / "validation_parallel" # This should complete without error - _main(["validate", str(bids_dir), str(output_prefix), "--validation-scope", "subject", "--n-cpus", "1"]) + _main( + [ + "validate", + str(bids_dir), + str(output_prefix), + "--validation-scope", + "subject", + "--n-cpus", + "1", + ] + ) # Verify the command completed successfully by checking if the output files exist assert (output_prefix.parent / f"{output_prefix.name}_validation.tsv").exists() @@ -314,7 +324,9 @@ def test_add_nifti_info_command_with_test_dataset(tmp_path): def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build_bids_dataset): """Test the print-metadata-fields command with the test BIDS dataset.""" - bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset") + bids_dir = _build_cli_dataset( + tmp_path, build_bids_dataset, dataset_name="metadata_fields_dataset" + ) # Run print-metadata-fields _main(["print-metadata-fields", str(bids_dir)]) @@ -327,7 +339,9 @@ def test_print_metadata_fields_command_with_test_dataset(tmp_path, capsys, build def test_remove_metadata_fields_command_with_test_dataset(tmp_path, build_bids_dataset): """Test the remove-metadata-fields command with the test BIDS dataset.""" - bids_dir = _build_cli_dataset(tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset") + bids_dir = _build_cli_dataset( + tmp_path, build_bids_dataset, dataset_name="remove_metadata_dataset" + ) # Get a sample JSON sidecar json_file = next(bids_dir.rglob("*.json")) diff --git a/cubids/tests/test_utils.py b/cubids/tests/test_utils.py index 0510cba2..a777ab86 100644 --- a/cubids/tests/test_utils.py +++ b/cubids/tests/test_utils.py @@ -1,5 +1,8 @@ """Tests for the utils module.""" +from pathlib import Path +from unittest.mock import patch + import pandas as pd from cubids import utils @@ -156,7 +159,6 @@ def test_cluster_single_parameters(): [0, 0, 0, 0, 0, 0, 1, 2], ) - # Change the tolerance for SliceTiming config["sidecar_params"]["func"]["SliceTiming"]["tolerance"] = 0.5 out_df = utils.cluster_single_parameters( @@ -201,3 +203,33 @@ def test_ignore_entities_in_entity_set(tmp_path): finally: NON_KEY_ENTITIES.clear() NON_KEY_ENTITIES.update(original_non_key) + + +def test_remove_empty_dirs_after_purge_removes_empty_dirs(tmp_path): + """Test that remove_empty_dirs_after_purge removes empty parent dirs up to BIDS root.""" + bids_root = tmp_path / "bids" + func_dir = bids_root / "sub-01" / "ses-01" / "func" + func_dir.mkdir(parents=True) + removed_file = func_dir / "file.nii.gz" + removed_file.touch() + removed_file.unlink() # File already removed; dirs are now empty + utils.remove_empty_dirs_after_purge([str(removed_file)], bids_root) + assert not (bids_root / "sub-01").exists() + assert not (bids_root / "sub-01" / "ses-01").exists() + assert not func_dir.exists() + + +def test_remove_empty_dirs_after_purge_handles_rmdir_error(tmp_path, capsys): + """Test that remove_empty_dirs_after_purge catches OSError on rmdir and prints message.""" + bids_root = tmp_path / "bids" + (bids_root / "sub-01" / "ses-01" / "func").mkdir(parents=True) + removed_file = bids_root / "sub-01" / "ses-01" / "func" / "file.nii.gz" + removed_file.touch() + removed_file.unlink() + removed_paths = [str(removed_file)] + + with patch.object(Path, "rmdir", side_effect=OSError(39, "Directory not empty")): + utils.remove_empty_dirs_after_purge(removed_paths, bids_root) + + out, _ = capsys.readouterr() + assert "Failed to remove directory" in out diff --git a/cubids/utils.py b/cubids/utils.py index f42caeda..4d824da0 100644 --- a/cubids/utils.py +++ b/cubids/utils.py @@ -754,6 +754,37 @@ def img_to_new_ext(img_path, new_ext): return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext +def remove_empty_dirs_after_purge(removed_paths, bids_root): + """Remove directories that are empty after purging the given files. + + Walks from the deepest directory up toward the BIDS root and removes + any directory that is empty. + + Parameters + ---------- + removed_paths : list of str or Path + Paths to files that were removed by purge. + bids_root : Path or str + Root of the BIDS dataset; directories are only removed inside this tree. + """ + bids_root = Path(bids_root).resolve() + directories_to_check = set() + for removed_path in removed_paths: + parent_dir = Path(removed_path).resolve().parent + while parent_dir != bids_root and bids_root in parent_dir.parents: + directories_to_check.add(parent_dir) + parent_dir = parent_dir.parent + # Process deepest dirs first so parent can become empty after child is removed + for directory in sorted( + directories_to_check, key=lambda dir_path: len(dir_path.parts), reverse=True + ): + if directory.exists() and directory.is_dir() and not any(directory.iterdir()): + try: + directory.rmdir() + except OSError as e: + print(f"Failed to remove directory {directory}: {e}") + + def get_entity_value(path, key): """Given a filepath and BIDS key name, return the value associated with the key.