Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,13 @@ To drop all cells tagged with "solution" run:

The option accepts a list of tags separated by whitespace.

### Forcing UNIX newlines

Force UNIX (LF) newlines in the output (useful on Windows to keep consistent
line endings in filtered output or textconv diffs):

nbstripout --unix-newlines FILE.ipynb

### Keeping some output

Do not strip the execution count/prompt number:
Expand Down
14 changes: 12 additions & 2 deletions nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,12 @@ def main():

parser.add_argument('--textconv', '-t', action='store_true', help='Prints stripped files to STDOUT')

parser.add_argument(
'--unix-newlines',
action='store_true',
help='Force UNIX line endings in output (if unset, normalize to os.linesep)',
)

parser.add_argument('files', nargs='*', help='Files to strip output from')
args = parser.parse_args()
git_config = ['git', 'config']
Expand Down Expand Up @@ -600,10 +606,14 @@ def main():
keep_metadata_keys.extend(args.keep_metadata_keys.split())
extra_keys = [i for i in extra_keys if i not in keep_metadata_keys]

# Note that we can't actually preserve newlines from the input file: nbformat implicitly converts all newlines to \n
# and setting newline='' disables normalization of newlines on output, so the output will always use \n as newlines.
newline = '' if args.unix_newlines else None

# Wrap input/output stream in UTF-8 encoded text wrapper
# https://stackoverflow.com/a/16549381
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8') if sys.stdin else None
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline='')
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', newline=newline)

process_notebook = {'jupyter': process_jupyter_notebook, 'zeppelin': process_zeppelin_notebook}[args.mode]
any_change = False
Expand All @@ -612,7 +622,7 @@ def main():
continue

try:
with io.open(filename, 'r+', encoding='utf8') as f:
with io.open(filename, 'r+', encoding='utf8', newline=newline) as f:
out = output_stream if args.textconv or args.dry_run else f
if process_notebook(
input_stream=f, output_stream=out, args=args, extra_keys=extra_keys, filename=filename
Expand Down
29 changes: 29 additions & 0 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import sys
from pathlib import Path
import re
from subprocess import run, PIPE
Expand Down Expand Up @@ -202,3 +203,31 @@ def test_nochange_notebook_unchanged():
zpln_mtime_after = zpln_file.stat().st_mtime_ns

assert zpln_mtime_after == zpln_mtime_before


def test_newline_behavior(tmp_path: Path):
input_content = (NOTEBOOKS_FOLDER / 'test_drop_empty_cells.ipynb').read_bytes().replace(b'\n', b'\r\n')

to_os_eol = tmp_path / 'should-have-os-eol.ipynb'
to_os_eol.write_bytes(input_content)

run([nbstripout_exe(), to_os_eol])
if sys.platform == 'win32':
assert b'\r\n' in to_os_eol.read_bytes()
else:
assert b'\r\n' not in to_os_eol.read_bytes()

pc = run([nbstripout_exe(), '--textconv', to_os_eol], stdout=PIPE)
if sys.platform == 'win32':
assert b'\r\n' in pc.stdout
else:
assert b'\r\n' not in pc.stdout

to_lf_eol = tmp_path / 'should-have-lf-eol.ipynb'
to_lf_eol.write_bytes(input_content)

run([nbstripout_exe(), '--unix-newlines', to_lf_eol])
assert b'\r\n' not in to_lf_eol.read_bytes()

pc = run([nbstripout_exe(), '--unix-newlines', '--textconv', to_lf_eol], stdout=PIPE)
assert b'\r\n' not in pc.stdout