Skip to content

Commit 0c5e46f

Browse files
committed
Show metadata size
Fixes #2637
1 parent 28c9e48 commit 0c5e46f

File tree

4 files changed

+44
-13
lines changed

4 files changed

+44
-13
lines changed

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
ID is e.g. a population name, rather than silently returning no samples.
99
(:user:`hyanwong`, :pr:`3344`)
1010

11+
**Features**
12+
13+
- Displaying a summary of the tree sequence now shows the metadata codec and
14+
size of the metadata for each table. (:user:`hyanwong`, :pr:`3343`, :issue:`2637`)
15+
1116
--------------------
1217
[1.0.0] - 2025-11-27
1318
--------------------

python/tests/test_highlevel.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,8 +1991,24 @@ def test_html_repr(self, ts):
19911991
assert len(html) > 5000
19921992
assert f"<tr><td>Trees</td><td>{ts.num_trees:,}</td></tr>" in html
19931993
assert f"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>" in html
1994-
for table in ts.tables.table_name_map:
1995-
assert f"<td>{table.capitalize()}</td>" in html
1994+
codecs = collections.defaultdict(int)
1995+
for table_name, table in ts.tables.table_name_map.items():
1996+
assert f"<td>{table_name.capitalize()}</td>" in html
1997+
if hasattr(table, "metadata_schema"):
1998+
schema = table.metadata_schema.schema
1999+
codec = schema["codec"] if schema else "raw"
2000+
codecs[codec] += 1
2001+
assert "<td>Metadata</td>" in html
2002+
assert "<td>Metadata size</td>" in html
2003+
num_tables_with_metadata = 0
2004+
for codec, count in codecs.items():
2005+
assert html.count(f">{codec}</td>") == count
2006+
num_tables_with_metadata += count
2007+
# Only one table (provenances) has no metadata
2008+
assert num_tables_with_metadata == len(ts.tables.table_name_map) - 1
2009+
# All metadata tables should show the percentage metadata size
2010+
assert html.count("%)</td>") == num_tables_with_metadata
2011+
19962012
if ts.num_provenances > 0:
19972013
assert (
19982014
f"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>"

python/tskit/trees.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4476,19 +4476,16 @@ def __str__(self):
44764476
["Sample Nodes", util.format_number(self.num_samples, sep=",")],
44774477
["Total Size", util.naturalsize(self.nbytes)],
44784478
]
4479-
header = ["Table", "Rows", "Size", "Has Metadata"]
4479+
header = ["Table", "Rows", "Size", "Metadata", "Metadata size"]
44804480
table_rows = []
44814481
for name, table in self.tables.table_name_map.items():
44824482
table_rows.append(
44834483
[
44844484
name.capitalize(),
44854485
f"{util.format_number(table.num_rows, sep=',')}",
44864486
util.naturalsize(table.nbytes),
4487-
(
4488-
"Yes"
4489-
if hasattr(table, "metadata") and len(table.metadata) > 0
4490-
else "No"
4491-
),
4487+
util.metadata_codec(table),
4488+
util.metadata_size(table),
44924489
]
44934490
)
44944491
return util.unicode_table(ts_rows, title="TreeSequence") + util.unicode_table(

python/tskit/util.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -534,17 +534,29 @@ def html_table(rows, *, header):
534534
"""
535535

536536

537+
def metadata_codec(table):
538+
if hasattr(table, "metadata_schema"):
539+
schema = table.metadata_schema.schema
540+
return "raw" if schema is None else schema.get("codec", "unknown")
541+
return ""
542+
543+
544+
def metadata_size(table):
545+
if hasattr(table, "metadata"):
546+
frac = len(table.metadata) / table.nbytes
547+
return f"{naturalsize(len(table.metadata))} ({frac:.0%})"
548+
return ""
549+
550+
537551
def tree_sequence_html(ts):
538552
table_rows = "".join(
539553
f"""
540554
<tr>
541555
<td>{name.capitalize()}</td>
542556
<td>{format_number(table.num_rows)}</td>
543557
<td>{naturalsize(table.nbytes)}</td>
544-
<td style="text-align: center;">
545-
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
546-
else ''}
547-
</td>
558+
<td style="text-align: center;">{metadata_codec(table)}</td>
559+
<td>{metadata_size(table)}</td>
548560
</tr>
549561
"""
550562
for name, table in ts.tables.table_name_map.items()
@@ -637,7 +649,8 @@ def tree_sequence_html(ts):
637649
<th style="line-height:21px;">Table</th>
638650
<th>Rows</th>
639651
<th>Size</th>
640-
<th>Has Metadata</th>
652+
<th>Metadata</th>
653+
<th>Metadata size</th>
641654
</tr>
642655
</thead>
643656
<tbody>

0 commit comments

Comments
 (0)