diff --git a/README.rst b/README.rst index 95cd3cc..529b164 100644 --- a/README.rst +++ b/README.rst @@ -18,6 +18,17 @@ Note: we have different importers and some importers may not support all the fea Currently supported import formats ================================== +`BorgBackup `_ +-------------------------------------------------- + +Imports archives from an existing Borg repository into a new one. +This is useful when a Borg repository needs to be rebuilt (e.g. if +your borg key and passphrase was compromised). + +Usage: ``borg-import borg SOURCE_REPOSITORY DESTINATION_REPOSITORY`` + +See ``borg-import borg -h`` for help. + `rsnapshot `_ ----------------------------------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index a22804b..307ca2e 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -24,3 +24,10 @@ borg-import rsnapshot --------------------- .. generate-usage:: rsnapshot + +.. _borg: + +borg-import borg +---------------- + +.. generate-usage:: borg diff --git a/pyproject.toml b/pyproject.toml index b689a8b..fc5ff90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,9 +49,6 @@ write_to = "src/borg_import/_version.py" python_files = "testsuite/*.py" testpaths = ["src"] -[tool.pytest.ini_options] -addopts = "-rs --cov=borg_import --cov-config=pyproject.toml" - [tool.flake8] max-line-length = 120 exclude = "build,dist,.git,.idea,.cache,.tox,docs/conf.py,.eggs" @@ -59,7 +56,7 @@ exclude = "build,dist,.git,.idea,.cache,.tox,docs/conf.py,.eggs" [tool.coverage.run] branch = true source = ["src/borg_import"] -omit = ["*/borg_import/helpers/testsuite/*"] +omit = ["*/borg_import/helpers/testsuite/*", "*/borg_import/testsuite/*"] [tool.coverage.report] exclude_lines = [ @@ -77,15 +74,14 @@ env_list = ["py39", "py310", "py311", "py312", "py313", "flake8"] [tool.tox.env_run_base] package = "editable-legacy" +commands = [["pytest", "-v", "-rs", "--cov=borg_import", "--cov-config=pyproject.toml", "--pyargs", "{posargs:borg_import}"]] +deps = ["-rrequirements.d/development.txt"] passenv = ["*"] [tool.tox.env_pkg_base] passenv = ["*"] -[tool.tox.env.testenv] -deps = ["-rrequirements.d/development.txt"] -commands = [["pytest", "-rs", "--cov=borg_import", "--cov-config=pyproject.toml", "--pyargs={posargs:borg_import.helpers.testsuite}"]] +[tool.tox.env."py{39,310,311,312,313}"] [tool.tox.env.flake8] -deps = ["flake8-pyproject"] commands = [["flake8"]] diff --git a/requirements.d/development.txt b/requirements.d/development.txt index c813c00..3b5c583 100644 --- a/requirements.d/development.txt +++ b/requirements.d/development.txt @@ -2,3 +2,4 @@ virtualenv tox pytest pytest-cov +flake8-pyproject diff --git a/src/borg_import/borg.py b/src/borg_import/borg.py new file mode 100644 index 0000000..407ecb6 --- /dev/null +++ b/src/borg_import/borg.py @@ -0,0 +1,25 @@ +import subprocess + +from .helpers.timestamps import datetime_from_string + + +def get_borg_archives(repository): + """Get all archive metadata discovered in the Borg repository.""" + # Get list of archives with their timestamps + borg_cmdline = ['borg', 'list', '--format', '{name}{TAB}{time}{NL}', repository] + output = subprocess.check_output(borg_cmdline).decode() + + for line in output.splitlines(): + if not line.strip(): + continue + + parts = line.split('\t', 1) + if len(parts) == 2: + name, timestamp_str = parts + timestamp = datetime_from_string(timestamp_str) + meta = dict( + name=name, + timestamp=timestamp, + original_repository=repository, + ) + yield meta diff --git a/src/borg_import/helpers/timestamps.py b/src/borg_import/helpers/timestamps.py index acbefbc..058721f 100644 --- a/src/borg_import/helpers/timestamps.py +++ b/src/borg_import/helpers/timestamps.py @@ -32,6 +32,8 @@ def datetime_from_string(s): '%Y-%m-%d %H:%M', # date tool output [C / en_US locale]: '%a %b %d %H:%M:%S %Z %Y', + # borg format with day of week + '%a, %Y-%m-%d %H:%M:%S', # rsync-time-backup format '%Y-%m-%d-%H%M%S' # for more, see https://xkcd.com/1179/ diff --git a/src/borg_import/main.py b/src/borg_import/main.py index 5c49e41..32933ba 100755 --- a/src/borg_import/main.py +++ b/src/borg_import/main.py @@ -1,5 +1,6 @@ import argparse import logging +import os import shutil import shlex import subprocess @@ -10,12 +11,13 @@ from .rsnapshots import get_snapshots from .rsynchl import get_rsyncsnapshots from .rsync_tmbackup import get_tmbackup_snapshots +from .borg import get_borg_archives log = logging.getLogger(__name__) def borg_import(args, archive_name, path, timestamp=None): - borg_cmdline = ['borg', 'create'] + borg_cmdline = ['borg', 'create', '--numeric-ids', '--files-cache=mtime,size'] if timestamp: borg_cmdline += '--timestamp', timestamp.isoformat() if args.create_options: @@ -282,6 +284,87 @@ def import_rsync_tmbackup(self, args): import_journal.unlink() +class borgImporter(Importer): + name = 'borg' + description = 'import archives from another Borg repository' + epilog = """ + Imports archives from an existing Borg repository into a new one. + + This is useful when a Borg repository needs to be rebuilt and all archives + transferred from the old repository to a new one. + + The importer extracts each archive from the source repository to a intermediate + directory inside the current work directory (make sure there is enough space!) + and then creates a new archive with the same name and timestamp in the destination + repository. + + Because the importer changes the current directory while importing archives, + you need to give either absolute paths for the source and destination repositories + or ssh:// URLs. + + To avoid issues with user/group id-to-name mappings, the importer will only + transfer the numeric user and group ids for the files inside the archives. + + By default, archive names are preserved. Use --prefix to add a prefix to + the imported archive names. + """ + + def populate_parser(self, parser): + parser.add_argument('source_repository', metavar='SOURCE_REPOSITORY', + help='Source Borg repository (must be a valid Borg repository spec)') + parser.add_argument('repository', metavar='DESTINATION_REPOSITORY', + help='Destination Borg repository (must be a valid Borg repository spec)') + parser.set_defaults(function=self.import_borg) + + def import_borg(self, args): + existing_archives = list_borg_archives(args) + + # Create a fixed unique directory inside the current working directory + import_path = Path.cwd() / f"borg_import_{os.getpid()}" + import_path.mkdir(exist_ok=True) + + try: + for archive in get_borg_archives(args.source_repository): + name = archive['name'] + timestamp = archive['timestamp'].replace(microsecond=0) + archive_name = args.prefix + name + + if archive_name in existing_archives: + print('Skipping (already exists in repository):', name) + continue + + print('Importing {} (timestamp {}) '.format(name, timestamp), end='') + if archive_name != name: + print('as', archive_name) + else: + print() + + try: + # Extract the archive from the source repository + extract_cmdline = ['borg', 'extract', '--numeric-ids'] + extract_cmdline.append(args.source_repository + '::' + name) + + print(' Extracting archive to import directory...') + subprocess.check_call(extract_cmdline, cwd=str(import_path)) + + # Create a new archive in the destination repository + borg_import(args, archive_name, str(import_path), timestamp=timestamp) + + # Empty the directory after importing the archive + print(' Cleaning import directory...') + shutil.rmtree(import_path) + import_path.mkdir(exist_ok=True) + + except subprocess.CalledProcessError as cpe: + print('Error during import of {}: {}'.format(name, cpe)) + if cpe.returncode != 1: # Borg returns 1 for warnings + raise + finally: + # Clean up the import directory when done + if import_path.exists(): + shutil.rmtree(import_path) + + def build_parser(): common_parser = argparse.ArgumentParser(add_help=False) common_group = common_parser.add_argument_group('Common options') diff --git a/src/borg_import/testsuite/__init__.py b/src/borg_import/testsuite/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/borg_import/testsuite/test_borg.py b/src/borg_import/testsuite/test_borg.py new file mode 100644 index 0000000..dbd6b70 --- /dev/null +++ b/src/borg_import/testsuite/test_borg.py @@ -0,0 +1,78 @@ +import subprocess + +from borg_import.main import main + + +def test_borg_import(tmpdir, monkeypatch): + """Test the borg importer by creating archives in a source repo and importing them to a target repo.""" + # Create source and target repository directories + source_repo = tmpdir.mkdir("source_repo") + target_repo = tmpdir.mkdir("target_repo") + + # Create test data directories + test_data = tmpdir.mkdir("test_data") + archive1_data = test_data.mkdir("archive1") + archive2_data = test_data.mkdir("archive2") + + # Create some test files in the archive directories + archive1_data.join("file1.txt").write("This is file 1 in archive 1") + archive1_data.join("file2.txt").write("This is file 2 in archive 1") + archive2_data.join("file1.txt").write("This is file 1 in archive 2") + archive2_data.join("file2.txt").write("This is file 2 in archive 2") + + # Initialize the source repository + subprocess.check_call(["borg", "init", "--encryption=none", str(source_repo)]) + + # Create archives in the source repository + subprocess.check_call([ + "borg", "create", + f"{source_repo}::archive1", + "." + ], cwd=str(archive1_data)) + + subprocess.check_call([ + "borg", "create", + f"{source_repo}::archive2", + "." + ], cwd=str(archive2_data)) + + # Initialize the target repository + subprocess.check_call(["borg", "init", "--encryption=none", str(target_repo)]) + + # Set up command line arguments for borg-import + monkeypatch.setattr("sys.argv", [ + "borg-import", + "borg", + str(source_repo), + str(target_repo) + ]) + + # Run the borg-import command + main() + + # Verify that the archives were imported to the target repository + output = subprocess.check_output(["borg", "list", "--short", str(target_repo)]).decode() + archives = output.splitlines() + + assert "archive1" in archives + assert "archive2" in archives + + # Extract the archives from the target repository and verify their contents + extract_dir1 = tmpdir.mkdir("extract1") + extract_dir2 = tmpdir.mkdir("extract2") + + subprocess.check_call([ + "borg", "extract", + f"{target_repo}::archive1" + ], cwd=str(extract_dir1)) + + subprocess.check_call([ + "borg", "extract", + f"{target_repo}::archive2" + ], cwd=str(extract_dir2)) + + # Verify the contents of the extracted archives + assert extract_dir1.join("file1.txt").read() == "This is file 1 in archive 1" + assert extract_dir1.join("file2.txt").read() == "This is file 2 in archive 1" + assert extract_dir2.join("file1.txt").read() == "This is file 1 in archive 2" + assert extract_dir2.join("file2.txt").read() == "This is file 2 in archive 2"