-
Notifications
You must be signed in to change notification settings - Fork 1
Implement HG-DT Visual Interpretation & Causal Dashboard #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat/dna-to-protein-pipeline-13697527110041319258
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| import streamlit as st | ||
| import numpy as np | ||
| import os | ||
| import tempfile | ||
| import matplotlib.pyplot as plt | ||
| from PIL import Image | ||
|
|
||
| # Import the new modules | ||
| from src.hg_dt.viz.tracks_plotter import plot_tracks | ||
| from src.hg_dt.viz.hic_plotter import plot_hic_triangle | ||
| from src.hg_dt.viz.protein_viz import render_protein_comparison | ||
| from src.hg_dt.analyze.attribution import generate_mechanistic_insight | ||
|
Comment on lines
+8
to
+12
|
||
|
|
||
| st.set_page_config(page_title="HG-DT Causal Dashboard", layout="wide") | ||
|
|
||
| st.title("HG-DT: Visual Interpretation & Causal Dashboard") | ||
|
|
||
| # Mock data generation for demonstration | ||
| @st.cache_data | ||
| def generate_mock_data(mod_type: str, gene: str): | ||
| # Tracks: 100 bins | ||
| ref_track = np.random.normal(5, 1, 100) | ||
| mut_track = ref_track.copy() | ||
| if mod_type == "Deletion": | ||
| # Simulate loss in middle bins | ||
| mut_track[40:60] = mut_track[40:60] * 0.2 | ||
|
|
||
| # Contact Maps: 50x50 | ||
| ref_hic = np.random.rand(50, 50) | ||
| ref_hic = (ref_hic + ref_hic.T) / 2 | ||
| np.fill_diagonal(ref_hic, 1.0) | ||
|
|
||
| mut_hic = ref_hic.copy() | ||
| if mod_type == "Deletion": | ||
| # Disrupted loop | ||
| mut_hic[10:20, 30:40] = mut_hic[10:20, 30:40] * 0.3 | ||
| mut_hic[30:40, 10:20] = mut_hic[10:20, 30:40] | ||
|
|
||
| mock_pdb = "" | ||
|
|
||
| return ref_track, mut_track, ref_hic, mut_hic, mock_pdb, mock_pdb | ||
|
|
||
| @st.cache_data | ||
| def generate_accessibility_over_time(gene: str, mod_type: str, time_steps: int = 50): | ||
| """Simulate gene accessibility changing over time during structural transition.""" | ||
| t = np.linspace(0, 10, time_steps) | ||
| base_accessibility = 100.0 | ||
|
|
||
| if mod_type == "Deletion": | ||
| # Sigmoidal drop in accessibility | ||
| accessibility = base_accessibility - 60 * (1 / (1 + np.exp(-t + 5))) + np.random.normal(0, 2, time_steps) | ||
| elif mod_type == "Insertion": | ||
| # Sigmoidal increase | ||
| accessibility = base_accessibility + 80 * (1 / (1 + np.exp(-t + 5))) + np.random.normal(0, 2, time_steps) | ||
| else: | ||
| accessibility = np.full(time_steps, base_accessibility) + np.random.normal(0, 2, time_steps) | ||
|
|
||
| return t, accessibility | ||
|
|
||
| # Tabs definition | ||
| tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ | ||
| "Specification", | ||
| "Genome Tracks", | ||
| "3D Organization", | ||
| "Protein Structure", | ||
| "Trajectory Animation", | ||
| "Mechanistic Attribution" | ||
| ]) | ||
|
|
||
| if "mod_type" not in st.session_state: | ||
| st.session_state.mod_type = "None" | ||
| if "gene" not in st.session_state: | ||
| st.session_state.gene = "TAL1" | ||
| if "analyzed" not in st.session_state: | ||
| st.session_state.analyzed = False | ||
|
|
||
| with tab1: | ||
| st.header("Input DNA Modification") | ||
|
|
||
| col1, col2 = st.columns(2) | ||
| with col1: | ||
| gene = st.selectbox( | ||
| "Select Target Gene:", | ||
| ["TAL1", "OCT4", "NANOG", "SOX2", "Mef2c"] | ||
| ) | ||
| with col2: | ||
| mod_type = st.selectbox( | ||
| "Select Modification Type:", | ||
| ["None", "Deletion", "Insertion", "Duplication"] | ||
| ) | ||
|
|
||
| chrom = st.text_input("Chromosome", "chr1") | ||
| locus = st.text_input("Locus", "47200000-47250000") | ||
|
|
||
| if st.button("Run Analysis"): | ||
| st.session_state.gene = gene | ||
| st.session_state.mod_type = mod_type | ||
| st.session_state.analyzed = True | ||
| st.success(f"Analysis triggered for {gene} ({mod_type}) at {chrom}:{locus}") | ||
|
|
||
| if st.session_state.analyzed and st.session_state.mod_type != "None": | ||
| ref_track, mut_track, ref_hic, mut_hic, ref_pdb, mut_pdb = generate_mock_data( | ||
| st.session_state.mod_type, st.session_state.gene | ||
| ) | ||
|
|
||
| with tab2: | ||
| st.header("1D Genome Tracks (Ref vs. Mut)") | ||
| st.write(f"Linear browser views for accessibility and expression deltas for **{st.session_state.gene}**.") | ||
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | ||
| track_img_path = plot_tracks(ref_track, mut_track, tmp.name, title=f"1D Tracks: {st.session_state.gene} {st.session_state.mod_type}") | ||
| st.image(Image.open(track_img_path), use_column_width=True) | ||
|
|
||
|
Comment on lines
+109
to
+112
|
||
| with tab3: | ||
| st.header("3D Organization (Hi-C Heatmaps)") | ||
| st.write("Triangular/Matrix contact map deltas + loop reorganization.") | ||
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | ||
| hic_img_path = plot_hic_triangle(ref_hic, mut_hic, tmp.name, title=f"3D Contact Map: {st.session_state.gene} {st.session_state.mod_type}") | ||
| st.image(Image.open(hic_img_path), use_column_width=True) | ||
|
Comment on lines
+116
to
+118
|
||
|
|
||
| with tab4: | ||
| st.header("Protein Structure") | ||
| st.write("View 3D folding (Ref vs. Mut).") | ||
| render_protein_comparison(ref_pdb, mut_pdb) | ||
|
|
||
| with tab5: | ||
| st.header("Trajectory Animation") | ||
| st.write(f"Molecular simulation trajectory: tracking **{st.session_state.gene}** accessibility over time as structure transitions from reference to mutant state.") | ||
|
|
||
| t, accessibility = generate_accessibility_over_time(st.session_state.gene, st.session_state.mod_type) | ||
|
|
||
| fig, ax = plt.subplots(figsize=(10, 4)) | ||
| ax.plot(t, accessibility, marker='o', linestyle='-', color='purple') | ||
| ax.set_title(f"{st.session_state.gene} Accessibility Over Simulation Time") | ||
| ax.set_xlabel("Simulation Step (Time)") | ||
| ax.set_ylabel("Accessibility Signal") | ||
| ax.grid(True, alpha=0.3) | ||
|
|
||
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | ||
| fig.tight_layout() | ||
| fig.savefig(tmp.name, dpi=150) | ||
| plt.close(fig) | ||
| st.image(Image.open(tmp.name), use_column_width=True) | ||
|
|
||
|
Comment on lines
+138
to
+143
|
||
| st.info("The plot above simulates the real-time changes in accessibility as the chromatin refolds post-modification.") | ||
|
|
||
| with tab6: | ||
| st.header("Mechanistic Attribution") | ||
|
|
||
| mod_details = {"type": st.session_state.mod_type.lower(), "target": f"enhancer element near {st.session_state.gene}"} | ||
|
|
||
| if st.session_state.mod_type == "Deletion": | ||
| delta_stats = { | ||
| "loop_weakened": True, | ||
| "accessibility_drop": 0.28 if st.session_state.gene == "TAL1" else 0.45, | ||
| "expression_drop": 0.35 if st.session_state.gene == "TAL1" else 0.50 | ||
| } | ||
| elif st.session_state.mod_type == "Insertion": | ||
| delta_stats = { | ||
| "loop_strengthened": True, | ||
| "accessibility_drop": -0.50, # Negative drop = gain | ||
| "expression_drop": -0.80 | ||
| } | ||
| else: | ||
| delta_stats = { | ||
| "loop_weakened": False, | ||
| "accessibility_drop": 0.0, | ||
| "expression_drop": 0.0 | ||
| } | ||
|
|
||
| insight = generate_mechanistic_insight(mod_details, delta_stats) | ||
|
|
||
| st.info("### Mechanistic Summary") | ||
| st.success(insight) | ||
|
|
||
| elif st.session_state.analyzed: | ||
| st.warning("Please select a valid modification from the Specification tab.") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| from typing import Dict, Any | ||
|
|
||
| def generate_mechanistic_insight(mod_details: Dict[str, Any], delta_stats: Dict[str, Any]) -> str: | ||
| """ | ||
| Generate a 'Mechanistic Insight' text string summarizing the multi-scale delta. | ||
|
|
||
| Args: | ||
| mod_details: Dictionary containing modification details like 'type', 'target', 'locus'. | ||
| delta_stats: Dictionary containing computed deltas, e.g., | ||
| {'accessibility_drop': 0.28, 'expression_drop': 0.35, 'loop_weakened': True} | ||
|
|
||
| Returns: | ||
| A human-readable string attributing the structural consequence. | ||
| """ | ||
| mod_type = mod_details.get("type", "modification") | ||
| target = mod_details.get("target", "element") | ||
|
|
||
| insight_parts = [f"This {mod_type} affects {target}"] | ||
|
|
||
| if delta_stats.get("loop_weakened"): | ||
| insight_parts.append("weakens enhancer-promoter loop") | ||
| elif delta_stats.get("loop_strengthened"): | ||
| insight_parts.append("strengthens enhancer-promoter loop") | ||
|
|
||
|
Comment on lines
+3
to
+24
|
||
| acc_drop = delta_stats.get("accessibility_drop", 0) | ||
| if acc_drop > 0: | ||
| insight_parts.append(f"accessibility ↓{int(acc_drop * 100)}%") | ||
| elif acc_drop < 0: | ||
| insight_parts.append(f"accessibility ↑{int(-acc_drop * 100)}%") | ||
|
|
||
| exp_drop = delta_stats.get("expression_drop", 0) | ||
| if exp_drop > 0: | ||
| insight_parts.append(f"expression ↓{int(exp_drop * 100)}%") | ||
| elif exp_drop < 0: | ||
| insight_parts.append(f"expression ↑{int(-exp_drop * 100)}%") | ||
|
|
||
| # Example format: "This deletion removes ccRE EH38E1800647 -> weakens enhancer-promoter loop -> accessibility down 28% -> expression down 35%." | ||
| return " → ".join(insight_parts) + "." | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,44 @@ | ||||||||||||||
| import os | ||||||||||||||
| import matplotlib.pyplot as plt | ||||||||||||||
| import numpy as np | ||||||||||||||
| from matplotlib.colors import LinearSegmentedColormap | ||||||||||||||
|
Comment on lines
+1
to
+4
|
||||||||||||||
| import os | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from matplotlib.colors import LinearSegmentedColormap | |
| import matplotlib.pyplot as plt | |
| import numpy as np |
Copilot
AI
Apr 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
np.triu(ref_map)/np.triu(mut_map) and the subsequent subtraction assume both maps are 2D and the same shape (and typically square for Hi-C). Add input validation (ndim == 2, same shape, optionally square) and raise a clear error if not.
Copilot
AI
Apr 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vmax = np.max(np.abs(delta_tri)) can be 0 when the maps are identical, which makes vmin=-vmax, vmax=vmax invalid (matplotlib warns about identical limits and the colormap scaling becomes meaningless). Guard for vmax == 0 (e.g., skip setting vmin/vmax or use a small epsilon).
| im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, vmin=-vmax, vmax=vmax, interpolation='nearest') | |
| if vmax > 0: | |
| im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, vmin=-vmax, vmax=vmax, interpolation='nearest') | |
| else: | |
| im2 = axes[2].imshow(delta_tri, cmap=cmap_delta, interpolation='nearest') |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| import py3Dmol | ||
| import streamlit as st | ||
| from stmol import showmol | ||
|
Comment on lines
+1
to
+3
|
||
|
|
||
| def show_protein_3d(pdb_data: str, width: int = 800, height: int = 400): | ||
| """ | ||
| Render a 3D protein structure using py3Dmol in Streamlit. | ||
|
|
||
| Args: | ||
| pdb_data: String containing the PDB file data. | ||
| width: Width of the viewer. | ||
| height: Height of the viewer. | ||
| """ | ||
| view = py3Dmol.view(width=width, height=height) | ||
| view.addModel(pdb_data, "pdb") | ||
| view.setStyle({'cartoon': {'color': 'spectrum'}}) | ||
| view.zoomTo() | ||
| showmol(view, height=height, width=width) | ||
|
|
||
| def render_protein_comparison(ref_pdb: str, mut_pdb: str): | ||
| """ | ||
| Render a side-by-side comparison of Ref and Mut proteins in Streamlit. | ||
| """ | ||
| col1, col2 = st.columns(2) | ||
|
|
||
| with col1: | ||
| st.subheader("Reference Structure") | ||
| if ref_pdb: | ||
| show_protein_3d(ref_pdb, width=350, height=350) | ||
| else: | ||
| st.info("Reference PDB not provided.") | ||
|
|
||
| with col2: | ||
| st.subheader("Mutant Structure") | ||
| if mut_pdb: | ||
| show_protein_3d(mut_pdb, width=350, height=350) | ||
| else: | ||
| st.info("Mutant PDB not provided.") | ||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,39 @@ | ||||
| import os | ||||
|
||||
| import os |
Copilot
AI
Apr 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
delta = mut_track - ref_track assumes both inputs are 1D arrays of the same length; if shapes differ this will raise a broadcasting error (or silently misbehave if one is length-1). Add explicit validation (ndim == 1, same length) and raise a clear ValueError early.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
osis imported but unused in this file; remove it to avoid unused-import warnings.