Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 176 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import streamlit as st
import numpy as np
import os
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os is imported but unused in this file; remove it to avoid unused-import warnings.

Suggested change
import os

Copilot uses AI. Check for mistakes.
import tempfile
import matplotlib.pyplot as plt
from PIL import Image

# Import the new modules
from src.hg_dt.viz.tracks_plotter import plot_tracks
from src.hg_dt.viz.hic_plotter import plot_hic_triangle
from src.hg_dt.viz.protein_viz import render_protein_comparison
from src.hg_dt.analyze.attribution import generate_mechanistic_insight
Comment on lines +8 to +12
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like pipeline/app.py, this Streamlit entrypoint should be robust to being launched from a different working directory (e.g., streamlit run /path/to/repo/app.py). Right now there’s no sys.path bootstrap, so from src.hg_dt... imports can fail depending on CWD. Consider inserting the repo root into sys.path early (see pipeline/app.py:26-30 for an established pattern in this repo).

Copilot uses AI. Check for mistakes.

st.set_page_config(page_title="HG-DT Causal Dashboard", layout="wide")

st.title("HG-DT: Visual Interpretation & Causal Dashboard")

# Mock data generation for demonstration
@st.cache_data
def generate_mock_data(mod_type: str, gene: str):
# Tracks: 100 bins
ref_track = np.random.normal(5, 1, 100)
mut_track = ref_track.copy()
if mod_type == "Deletion":
# Simulate loss in middle bins
mut_track[40:60] = mut_track[40:60] * 0.2

# Contact Maps: 50x50
ref_hic = np.random.rand(50, 50)
ref_hic = (ref_hic + ref_hic.T) / 2
np.fill_diagonal(ref_hic, 1.0)

mut_hic = ref_hic.copy()
if mod_type == "Deletion":
# Disrupted loop
mut_hic[10:20, 30:40] = mut_hic[10:20, 30:40] * 0.3
mut_hic[30:40, 10:20] = mut_hic[10:20, 30:40]

mock_pdb = ""

return ref_track, mut_track, ref_hic, mut_hic, mock_pdb, mock_pdb

@st.cache_data
def generate_accessibility_over_time(gene: str, mod_type: str, time_steps: int = 50):
"""Simulate gene accessibility changing over time during structural transition."""
t = np.linspace(0, 10, time_steps)
base_accessibility = 100.0

if mod_type == "Deletion":
# Sigmoidal drop in accessibility
accessibility = base_accessibility - 60 * (1 / (1 + np.exp(-t + 5))) + np.random.normal(0, 2, time_steps)
elif mod_type == "Insertion":
# Sigmoidal increase
accessibility = base_accessibility + 80 * (1 / (1 + np.exp(-t + 5))) + np.random.normal(0, 2, time_steps)
else:
accessibility = np.full(time_steps, base_accessibility) + np.random.normal(0, 2, time_steps)

return t, accessibility

# Tabs definition
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
"Specification",
"Genome Tracks",
"3D Organization",
"Protein Structure",
"Trajectory Animation",
"Mechanistic Attribution"
])

if "mod_type" not in st.session_state:
st.session_state.mod_type = "None"
if "gene" not in st.session_state:
st.session_state.gene = "TAL1"
if "analyzed" not in st.session_state:
st.session_state.analyzed = False

with tab1:
st.header("Input DNA Modification")

col1, col2 = st.columns(2)
with col1:
gene = st.selectbox(
"Select Target Gene:",
["TAL1", "OCT4", "NANOG", "SOX2", "Mef2c"]
)
with col2:
mod_type = st.selectbox(
"Select Modification Type:",
["None", "Deletion", "Insertion", "Duplication"]
)

chrom = st.text_input("Chromosome", "chr1")
locus = st.text_input("Locus", "47200000-47250000")

if st.button("Run Analysis"):
st.session_state.gene = gene
st.session_state.mod_type = mod_type
st.session_state.analyzed = True
st.success(f"Analysis triggered for {gene} ({mod_type}) at {chrom}:{locus}")

if st.session_state.analyzed and st.session_state.mod_type != "None":
ref_track, mut_track, ref_hic, mut_hic, ref_pdb, mut_pdb = generate_mock_data(
st.session_state.mod_type, st.session_state.gene
)

with tab2:
st.header("1D Genome Tracks (Ref vs. Mut)")
st.write(f"Linear browser views for accessibility and expression deltas for **{st.session_state.gene}**.")
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
track_img_path = plot_tracks(ref_track, mut_track, tmp.name, title=f"1D Tracks: {st.session_state.gene} {st.session_state.mod_type}")
st.image(Image.open(track_img_path), use_column_width=True)

Comment on lines +109 to +112
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Streamlit app writes plot images to NamedTemporaryFile(..., delete=False) but never removes them, which will leak temp files over time on repeated runs. Prefer rendering figures directly to memory (e.g., BytesIO) or explicitly os.unlink(...) after st.image; also ensure any Image.open(...) objects are closed to avoid file-handle leaks.

Copilot uses AI. Check for mistakes.
with tab3:
st.header("3D Organization (Hi-C Heatmaps)")
st.write("Triangular/Matrix contact map deltas + loop reorganization.")
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
hic_img_path = plot_hic_triangle(ref_hic, mut_hic, tmp.name, title=f"3D Contact Map: {st.session_state.gene} {st.session_state.mod_type}")
st.image(Image.open(hic_img_path), use_column_width=True)
Comment on lines +116 to +118
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same temp-file leak issue as above: this creates another delete=False image file and leaves it behind after rendering. Please clean up the temp file (or switch to in-memory rendering) to avoid accumulating files in the temp directory.

Copilot uses AI. Check for mistakes.

with tab4:
st.header("Protein Structure")
st.write("View 3D folding (Ref vs. Mut).")
render_protein_comparison(ref_pdb, mut_pdb)

with tab5:
st.header("Trajectory Animation")
st.write(f"Molecular simulation trajectory: tracking **{st.session_state.gene}** accessibility over time as structure transitions from reference to mutant state.")

t, accessibility = generate_accessibility_over_time(st.session_state.gene, st.session_state.mod_type)

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(t, accessibility, marker='o', linestyle='-', color='purple')
ax.set_title(f"{st.session_state.gene} Accessibility Over Simulation Time")
ax.set_xlabel("Simulation Step (Time)")
ax.set_ylabel("Accessibility Signal")
ax.grid(True, alpha=0.3)

with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
fig.tight_layout()
fig.savefig(tmp.name, dpi=150)
plt.close(fig)
st.image(Image.open(tmp.name), use_column_width=True)

Comment on lines +138 to +143
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same temp-file leak issue again for the trajectory plot: delete=False plus no cleanup will accumulate files across reruns. Consider using an in-memory buffer or deleting the temp file after st.image.

Copilot uses AI. Check for mistakes.
st.info("The plot above simulates the real-time changes in accessibility as the chromatin refolds post-modification.")

with tab6:
st.header("Mechanistic Attribution")

mod_details = {"type": st.session_state.mod_type.lower(), "target": f"enhancer element near {st.session_state.gene}"}

if st.session_state.mod_type == "Deletion":
delta_stats = {
"loop_weakened": True,
"accessibility_drop": 0.28 if st.session_state.gene == "TAL1" else 0.45,
"expression_drop": 0.35 if st.session_state.gene == "TAL1" else 0.50
}
elif st.session_state.mod_type == "Insertion":
delta_stats = {
"loop_strengthened": True,
"accessibility_drop": -0.50, # Negative drop = gain
"expression_drop": -0.80
}
else:
delta_stats = {
"loop_weakened": False,
"accessibility_drop": 0.0,
"expression_drop": 0.0
}

insight = generate_mechanistic_insight(mod_details, delta_stats)

st.info("### Mechanistic Summary")
st.success(insight)

elif st.session_state.analyzed:
st.warning("Please select a valid modification from the Specification tab.")
38 changes: 38 additions & 0 deletions src/hg_dt/analyze/attribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Dict, Any

def generate_mechanistic_insight(mod_details: Dict[str, Any], delta_stats: Dict[str, Any]) -> str:
"""
Generate a 'Mechanistic Insight' text string summarizing the multi-scale delta.

Args:
mod_details: Dictionary containing modification details like 'type', 'target', 'locus'.
delta_stats: Dictionary containing computed deltas, e.g.,
{'accessibility_drop': 0.28, 'expression_drop': 0.35, 'loop_weakened': True}

Returns:
A human-readable string attributing the structural consequence.
"""
mod_type = mod_details.get("type", "modification")
target = mod_details.get("target", "element")

insight_parts = [f"This {mod_type} affects {target}"]

if delta_stats.get("loop_weakened"):
insight_parts.append("weakens enhancer-promoter loop")
elif delta_stats.get("loop_strengthened"):
insight_parts.append("strengthens enhancer-promoter loop")

Comment on lines +3 to +24
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generate_mechanistic_insight is new behavior but has no unit tests. Given this repo already tests similar analysis helpers under src/hg_dt/analyze (e.g., tests/test_alphagenome_integration.py covers src/hg_dt/analyze/deltas.py), please add focused tests that assert the generated string for key cases (loop weakened/strengthened, positive vs negative deltas, missing keys/defaults).

Copilot uses AI. Check for mistakes.
acc_drop = delta_stats.get("accessibility_drop", 0)
if acc_drop > 0:
insight_parts.append(f"accessibility ↓{int(acc_drop * 100)}%")
elif acc_drop < 0:
insight_parts.append(f"accessibility ↑{int(-acc_drop * 100)}%")

exp_drop = delta_stats.get("expression_drop", 0)
if exp_drop > 0:
insight_parts.append(f"expression ↓{int(exp_drop * 100)}%")
elif exp_drop < 0:
insight_parts.append(f"expression ↑{int(-exp_drop * 100)}%")

# Example format: "This deletion removes ccRE EH38E1800647 -> weakens enhancer-promoter loop -> accessibility down 28% -> expression down 35%."
return " → ".join(insight_parts) + "."
44 changes: 44 additions & 0 deletions src/hg_dt/viz/hic_plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
Comment on lines +1 to +4
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os and LinearSegmentedColormap are imported but unused in this file; please remove them to keep the module clean and avoid unused-import warnings.

Suggested change
import os
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import numpy as np

Copilot uses AI. Check for mistakes.

def plot_hic_triangle(ref_map: np.ndarray, mut_map: np.ndarray, output_path: str, title: str = "3D Chromatin Contact Map (Ref vs Mut)"):
"""
Generate a triangular Hi-C heatmap for Ref vs Mut and their delta.

Args:
ref_map: 2D numpy array for reference contact frequencies.
mut_map: 2D numpy array for mutant contact frequencies.
output_path: Path to save the resulting image.
title: Title of the plot.
"""
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# We rotate the matrix to make it triangular for standard Hi-C view.
# To keep it simple, we just plot the 2D matrix directly but use upper triangle.
ref_tri = np.triu(ref_map)
mut_tri = np.triu(mut_map)
delta_tri = mut_tri - ref_tri
Comment on lines +18 to +22
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np.triu(ref_map)/np.triu(mut_map) and the subsequent subtraction assume both maps are 2D and the same shape (and typically square for Hi-C). Add input validation (ndim == 2, same shape, optionally square) and raise a clear error if not.

Copilot uses AI. Check for mistakes.

cmap_hic = "Reds"
cmap_delta = "coolwarm"

im0 = axes[0].imshow(ref_tri, cmap=cmap_hic, interpolation='nearest')
axes[0].set_title("Reference")
plt.colorbar(im0, ax=axes[0], fraction=0.046, pad=0.04)

im1 = axes[1].imshow(mut_tri, cmap=cmap_hic, interpolation='nearest')
axes[1].set_title("Mutant")
plt.colorbar(im1, ax=axes[1], fraction=0.046, pad=0.04)

vmax = np.max(np.abs(delta_tri))
im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, vmin=-vmax, vmax=vmax, interpolation='nearest')
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vmax = np.max(np.abs(delta_tri)) can be 0 when the maps are identical, which makes vmin=-vmax, vmax=vmax invalid (matplotlib warns about identical limits and the colormap scaling becomes meaningless). Guard for vmax == 0 (e.g., skip setting vmin/vmax or use a small epsilon).

Suggested change
im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, vmin=-vmax, vmax=vmax, interpolation='nearest')
if vmax > 0:
im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, vmin=-vmax, vmax=vmax, interpolation='nearest')
else:
im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, interpolation='nearest')

Copilot uses AI. Check for mistakes.
axes[2].set_title("Delta (Mut - Ref)")
plt.colorbar(im2, ax=axes[2], fraction=0.046, pad=0.04)

plt.suptitle(title)
plt.tight_layout()
plt.savefig(output_path, dpi=150)
plt.close()
return output_path
38 changes: 38 additions & 0 deletions src/hg_dt/viz/protein_viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import py3Dmol
import streamlit as st
from stmol import showmol
Comment on lines +1 to +3
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This module imports py3Dmol and stmol at import time. These are not in the repo's conda environment.yml, and (more importantly) this will crash the whole Streamlit app on startup if the optional 3D viewer deps aren’t installed, even if the user never opens the Protein tab. Consider lazy-importing inside show_protein_3d/render_protein_comparison and surfacing a clear Streamlit message when the deps are missing (similar to the lazy optional-dependency pattern in src/enhancer_id.py).

Copilot uses AI. Check for mistakes.

def show_protein_3d(pdb_data: str, width: int = 800, height: int = 400):
"""
Render a 3D protein structure using py3Dmol in Streamlit.

Args:
pdb_data: String containing the PDB file data.
width: Width of the viewer.
height: Height of the viewer.
"""
view = py3Dmol.view(width=width, height=height)
view.addModel(pdb_data, "pdb")
view.setStyle({'cartoon': {'color': 'spectrum'}})
view.zoomTo()
showmol(view, height=height, width=width)

def render_protein_comparison(ref_pdb: str, mut_pdb: str):
"""
Render a side-by-side comparison of Ref and Mut proteins in Streamlit.
"""
col1, col2 = st.columns(2)

with col1:
st.subheader("Reference Structure")
if ref_pdb:
show_protein_3d(ref_pdb, width=350, height=350)
else:
st.info("Reference PDB not provided.")

with col2:
st.subheader("Mutant Structure")
if mut_pdb:
show_protein_3d(mut_pdb, width=350, height=350)
else:
st.info("Mutant PDB not provided.")
39 changes: 39 additions & 0 deletions src/hg_dt/viz/tracks_plotter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

os is imported but never used in this module; consider removing it to avoid lint noise and keep dependencies minimal.

Suggested change
import os

Copilot uses AI. Check for mistakes.
import matplotlib.pyplot as plt
import numpy as np

def plot_tracks(ref_track: np.ndarray, mut_track: np.ndarray, output_path: str, title: str = "1D Genome Tracks (Ref vs. Mut)"):
"""
Generate a 1D browser plot comparing reference and mutant tracks.
Uses matplotlib as a lightweight alternative to a full pyGenomeTracks setup
for generating the 1D delta view (accessibility / expression).

Args:
ref_track: 1D numpy array of reference signal.
mut_track: 1D numpy array of mutant signal.
output_path: Path to save the resulting image.
title: Title of the plot.
"""
fig, axes = plt.subplots(3, 1, figsize=(10, 6), sharex=True)

# Ref track
axes[0].fill_between(range(len(ref_track)), ref_track, color="blue", alpha=0.7)
axes[0].set_ylabel("Reference")
axes[0].set_title(title)

# Mut track
axes[1].fill_between(range(len(mut_track)), mut_track, color="orange", alpha=0.7)
axes[1].set_ylabel("Mutant")

# Delta track
delta = mut_track - ref_track
axes[2].fill_between(range(len(delta)), delta, where=(delta >= 0), color="red", alpha=0.7, label="Gain")
axes[2].fill_between(range(len(delta)), delta, where=(delta < 0), color="green", alpha=0.7, label="Loss")
Comment on lines +24 to +31
Copy link

Copilot AI Apr 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delta = mut_track - ref_track assumes both inputs are 1D arrays of the same length; if shapes differ this will raise a broadcasting error (or silently misbehave if one is length-1). Add explicit validation (ndim == 1, same length) and raise a clear ValueError early.

Copilot uses AI. Check for mistakes.
axes[2].set_ylabel("Delta (Mut - Ref)")
axes[2].set_xlabel("Genomic Position (bins)")
axes[2].legend()

plt.tight_layout()
plt.savefig(output_path, dpi=150)
plt.close()
return output_path