diff --git a/README.md b/README.md index 1f4438d..b9a1ff5 100644 --- a/README.md +++ b/README.md @@ -310,6 +310,13 @@ To drop all cells tagged with "solution" run: The option accepts a list of tags separated by whitespace. +### Forcing UNIX newlines + +Force UNIX (LF) newlines in the output (useful on Windows to keep consistent +line endings in filtered output or textconv diffs): + + nbstripout --unix-newlines FILE.ipynb + ### Keeping some output Do not strip the execution count/prompt number: diff --git a/nbstripout/_nbstripout.py b/nbstripout/_nbstripout.py index 41dfe4d..a241c3d 100644 --- a/nbstripout/_nbstripout.py +++ b/nbstripout/_nbstripout.py @@ -531,6 +531,12 @@ def main(): parser.add_argument('--textconv', '-t', action='store_true', help='Prints stripped files to STDOUT') + parser.add_argument( + '--unix-newlines', + action='store_true', + help='Force UNIX line endings in output (if unset, normalize to os.linesep)', + ) + parser.add_argument('files', nargs='*', help='Files to strip output from') args = parser.parse_args() git_config = ['git', 'config'] @@ -600,10 +606,14 @@ def main(): keep_metadata_keys.extend(args.keep_metadata_keys.split()) extra_keys = [i for i in extra_keys if i not in keep_metadata_keys] + # Note that we can't actually preserve newlines from the input file: nbformat implicitly converts all newlines to \n + # and setting newline='' disables normalization of newlines on output, so the output will always use \n as newlines. + newline = '' if args.unix_newlines else None + # Wrap input/output stream in UTF-8 encoded text wrapper # https://stackoverflow.com/a/16549381 input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None - output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='') + output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline=newline) process_notebook = {'jupyter': process_jupyter_notebook, 'zeppelin': process_zeppelin_notebook}[args.mode] any_change = False @@ -612,7 +622,7 @@ def main(): continue try: - with io.open(filename, 'r+', encoding='utf8') as f: + with io.open(filename, 'r+', encoding='utf8', newline=newline) as f: out = output_stream if args.textconv or args.dry_run else f if process_notebook( input_stream=f, output_stream=out, args=args, extra_keys=extra_keys, filename=filename diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py index f0bfb83..321e64c 100644 --- a/tests/test_end_to_end.py +++ b/tests/test_end_to_end.py @@ -1,4 +1,5 @@ import os +import sys from pathlib import Path import re from subprocess import run, PIPE @@ -202,3 +203,31 @@ def test_nochange_notebook_unchanged(): zpln_mtime_after = zpln_file.stat().st_mtime_ns assert zpln_mtime_after == zpln_mtime_before + + +def test_newline_behavior(tmp_path: Path): + input_content = (NOTEBOOKS_FOLDER / 'test_drop_empty_cells.ipynb').read_bytes().replace(b'\n', b'\r\n') + + to_os_eol = tmp_path / 'should-have-os-eol.ipynb' + to_os_eol.write_bytes(input_content) + + run([nbstripout_exe(), to_os_eol]) + if sys.platform == 'win32': + assert b'\r\n' in to_os_eol.read_bytes() + else: + assert b'\r\n' not in to_os_eol.read_bytes() + + pc = run([nbstripout_exe(), '--textconv', to_os_eol], stdout=PIPE) + if sys.platform == 'win32': + assert b'\r\n' in pc.stdout + else: + assert b'\r\n' not in pc.stdout + + to_lf_eol = tmp_path / 'should-have-lf-eol.ipynb' + to_lf_eol.write_bytes(input_content) + + run([nbstripout_exe(), '--unix-newlines', to_lf_eol]) + assert b'\r\n' not in to_lf_eol.read_bytes() + + pc = run([nbstripout_exe(), '--unix-newlines', '--textconv', to_lf_eol], stdout=PIPE) + assert b'\r\n' not in pc.stdout