-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathapp.py
More file actions
1698 lines (1398 loc) · 60.6 KB
/
app.py
File metadata and controls
1698 lines (1398 loc) · 60.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
Depth Surge 3D Web UI
Flask application for converting 2D videos to immersive 3D VR format
"""
from __future__ import annotations
import os
import time
import uuid
import subprocess
import platform
import argparse
import sys
import signal
import base64
from datetime import datetime
from pathlib import Path
from typing import Any
import cv2
import numpy as np
# Set PyTorch memory allocator config BEFORE importing torch
# This helps prevent memory fragmentation on GPU
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
# Suppress warnings from dependencies
import warnings # noqa: E402
warnings.filterwarnings("ignore", category=SyntaxWarning) # moviepy old regex patterns
# Import our constants and utilities
from src.depth_surge_3d.core.constants import ( # noqa: E402
INTERMEDIATE_DIRS,
MODEL_PATHS,
MODEL_PATHS_METRIC,
SOCKETIO_PING_TIMEOUT,
SOCKETIO_PING_INTERVAL,
SOCKETIO_SLEEP_YIELD,
INITIAL_PROCESSING_DELAY,
BYTES_TO_GB_DIVISOR,
FPS_ROUND_DIGITS,
DURATION_ROUND_DIGITS,
ASPECT_RATIO_ROUND_DIGITS,
ASPECT_RATIO_SBS_THRESHOLD,
ASPECT_RATIO_OU_THRESHOLD,
SESSION_ID_DISPLAY_LENGTH,
PROGRESS_UPDATE_INTERVAL,
PROGRESS_DECIMAL_PLACES,
PROGRESS_STEP_WEIGHTS,
PREVIEW_UPDATE_INTERVAL,
PREVIEW_DOWNSCALE_WIDTH,
FFMPEG_OVERWRITE_FLAG,
FFMPEG_CRF_HIGH_QUALITY,
FFMPEG_CRF_MEDIUM_QUALITY,
FFMPEG_CRF_FAST_QUALITY,
FFMPEG_DEFAULT_PRESET,
FFMPEG_PIX_FORMAT,
VIDEO_CREATION_TIMEOUT,
DEFAULT_FALLBACK_FPS,
DEFAULT_SERVER_PORT,
DEFAULT_SERVER_HOST,
SIGNAL_SHUTDOWN_TIMEOUT,
)
from src.depth_surge_3d.utils.system.console import warning as console_warning # noqa: E402
from flask import Flask, render_template, request, jsonify # noqa: E402
from flask_socketio import SocketIO # noqa: E402
# NOTE: torch is imported later (line ~960) to avoid CUDA initialization issues
# Add src to path for package imports
sys.path.insert(0, str(Path(__file__).parent / "src"))
from depth_surge_3d.rendering import create_stereo_projector # noqa: E402
from depth_surge_3d.processing import VideoProcessor # noqa: E402
# Global flags and state
VERBOSE = False
SHUTDOWN_FLAG = False
ACTIVE_PROCESSES = set()
def vprint(*args: Any, **kwargs: Any) -> None:
"""Print only if verbose mode is enabled"""
if VERBOSE:
print(*args, **kwargs)
def _get_version_info() -> tuple[str, str]:
"""Get version and git commit ID"""
try:
from depth_surge_3d import __version__
version = __version__
except ImportError:
version = "dev"
try:
git_commit = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
capture_output=True,
text=True,
check=False,
timeout=1,
).stdout.strip()
if not git_commit:
git_commit = "unknown"
except Exception:
git_commit = "unknown"
return version, git_commit
def _rgb_to_ansi256(r: int, g: int, b: int) -> int:
"""Convert RGB (0-5 range) to ANSI 256 color code"""
return 16 + 36 * r + 6 * g + b
def _get_lime_color(position: float) -> str:
"""Lerp between dark green and brand lime (#39ff14) for smooth gradient"""
# Brand lime color from CSS: #39ff14 = RGB(57, 255, 20)
# Converted to ANSI 0-5 scale: (1, 5, 0)
# Create gradient from dark green to brand lime
start_rgb = (0, 3, 0) # Dark green
end_rgb = (1, 5, 0) # Brand lime (#39ff14)
# Lerp each RGB component with rounding for smoothness
r = round(start_rgb[0] + (end_rgb[0] - start_rgb[0]) * position)
g = round(start_rgb[1] + (end_rgb[1] - start_rgb[1]) * position)
b = round(start_rgb[2] + (end_rgb[2] - start_rgb[2]) * position)
# Clamp to valid range (0-5)
r = max(0, min(5, r))
g = max(0, min(5, g))
b = max(0, min(5, b))
# Convert to ANSI 256 color code
color_code = _rgb_to_ansi256(r, g, b)
return f"\033[38;5;{color_code}m"
def _print_banner_border(border_width: int, char: str, row_offset: int, total_rows: int) -> None:
"""Print border with diagonal gradient matching text"""
margin = 0.0 # No margin - let gradient span full banner
max_diagonal = total_rows + border_width
border = " " + _get_lime_color(0.0 - margin) + "█" # Leading space
for i in range(border_width):
# Calculate diagonal position with stretched gradient
raw_pos = (row_offset + i) / max_diagonal
stretched_pos = (raw_pos * (1 + 2 * margin)) - margin
border += f"{_get_lime_color(stretched_pos)}{char}"
border += _get_lime_color(1.0 + margin) + "█\033[0m"
print(border)
def _print_banner_line(line: str, row_idx: int, num_rows: int, max_diagonal: int) -> None:
"""Print single banner line with diagonal gradient"""
reset = "\033[0m"
margin = 0.0 # No margin - let gradient span full banner
# Left border with leading space
left_pos = row_idx / (num_rows + 1)
colored_line = f" {_get_lime_color(left_pos)}█{reset} " # Leading space
# Apply diagonal gradient to entire text
for col_idx, char in enumerate(line):
raw_pos = (row_idx + col_idx) / max_diagonal
stretched_pos = (raw_pos * (1 + 2 * margin)) - margin
colored_line += f"{_get_lime_color(stretched_pos)}{char}"
# Right border
right_pos = (row_idx + 1) / (num_rows + 1)
colored_line += f"{reset} {_get_lime_color(right_pos)}█{reset}"
print(colored_line)
def print_banner() -> None:
"""Print Depth Surge 3D banner with diagonal lime gradient"""
version, git_commit = _get_version_info()
blue_accent = "\033[38;5;39m"
bright_blue = "\033[38;5;51m"
reset = "\033[0m"
banner_lines = [
"░█▀▄░█▀▀░█▀█░▀█▀░█░█░░░█▀▀░█░█░█▀▄░█▀▀░█▀▀░░░▀▀█░█▀▄░",
"░█░█░█▀▀░█▀▀░░█░░█▀█░░░▀▀█░█░█░█▀▄░█░█░█▀▀░░░░▀▄░█░█░",
"░▀▀░░▀▀▀░▀░░░░▀░░▀░▀░░░▀▀▀░▀▀▀░▀░▀░▀▀▀░▀▀▀░░░▀▀░░▀▀░░",
]
print()
# Calculate dimensions
border_width = len(banner_lines[0]) + 2
num_rows = len(banner_lines)
line_length = len(banner_lines[0])
max_diagonal = num_rows + line_length - 2
# Print banner with borders (diagonal gradient on borders too)
total_rows = num_rows + 2 # +2 for top and bottom borders
_print_banner_border(border_width, "▀", row_offset=0, total_rows=total_rows)
for row_idx, line in enumerate(banner_lines):
_print_banner_line(
line, row_idx + 1, num_rows, max_diagonal
) # +1 to account for top border
_print_banner_border(border_width, "▄", row_offset=num_rows + 1, total_rows=total_rows)
# GitHub repo link (with leading space for alignment)
repo_link = "https://github.com/Tok/depth-surge-3d"
padding = (border_width - len(repo_link)) // 2 + 1 # +1 for leading space
print(f"{' ' * padding}{blue_accent}{repo_link}{reset}")
# Version and commit info (with leading space for alignment)
version_info = f"v{version} [{git_commit}]"
version_padding = (border_width - len(version_info)) // 2 + 1 # +1 for leading space
print(
f"{' ' * version_padding}v{bright_blue}{version}{reset} [{bright_blue}{git_commit}{reset}]"
)
print()
def cleanup_processes() -> None:
"""Clean up any active processing threads or subprocesses"""
global SHUTDOWN_FLAG
SHUTDOWN_FLAG = True
vprint("Cleaning up active processes...")
# Kill any ffmpeg processes related to this app
try:
subprocess.run(["pkill", "-f", "ffmpeg.*depth-surge"], check=False, capture_output=True)
except Exception:
pass
# Clean up any tracked processes
for proc in list(ACTIVE_PROCESSES):
try:
if hasattr(proc, "terminate"):
proc.terminate()
elif hasattr(proc, "kill"):
proc.kill()
except Exception:
pass
ACTIVE_PROCESSES.clear()
vprint("Process cleanup completed")
def signal_handler(signum: int, frame: Any) -> None:
"""Handle shutdown signals"""
print(f"\nReceived signal {signum}, shutting down gracefully...")
# Stop any active processing
if current_processing["active"]:
print(" Stopping active video processing...")
current_processing["stop_requested"] = True
# Wait a moment for cleanup
if current_processing["thread"] and current_processing["thread"].is_alive():
current_processing["thread"].join(timeout=SIGNAL_SHUTDOWN_TIMEOUT)
# Clean up all processes
cleanup_processes()
sys.exit(0)
app = Flask(__name__)
app.config["SECRET_KEY"] = "depth-surge-3d-secret"
app.config["OUTPUT_FOLDER"] = str(Path("output").resolve())
# Use threading async_mode and disable ping timeout for long-running tasks
socketio = SocketIO(
app,
cors_allowed_origins="*",
logger=False,
engineio_logger=False,
async_mode="threading",
ping_timeout=SOCKETIO_PING_TIMEOUT,
ping_interval=SOCKETIO_PING_INTERVAL,
)
@app.teardown_appcontext
def cleanup_on_teardown(error: Exception | None) -> None:
"""Clean up processes when Flask shuts down"""
if error:
vprint(f"App teardown due to error: {error}")
cleanup_processes()
# Global variables for processing state
current_processing = {
"active": False,
"progress": 0,
"stage": "",
"total_frames": 0,
"current_frame": 0,
"session_id": None,
"stop_requested": False,
"thread": None,
}
def ensure_directories() -> None:
"""Ensure output directory exists"""
Path(app.config["OUTPUT_FOLDER"]).mkdir(exist_ok=True)
def get_video_info(video_path: str | Path) -> dict[str, Any] | None:
"""Extract video information using OpenCV"""
cap = cv2.VideoCapture(str(video_path))
try:
if not cap.isOpened():
return None
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = frame_count / fps if fps > 0 else 0
# Calculate aspect ratio
aspect_ratio = width / height if height > 0 else 1.0
return {
"width": width,
"height": height,
"fps": round(fps, FPS_ROUND_DIGITS),
"duration": round(duration, DURATION_ROUND_DIGITS),
"frame_count": frame_count,
"aspect_ratio": round(aspect_ratio, ASPECT_RATIO_ROUND_DIGITS),
"aspect_ratio_text": f"{width}:{height}",
}
finally:
cap.release()
def find_source_video(directory: Path) -> Path | None:
"""
Find the source video file in an output directory.
Looks for video files excluding processed outputs (those with _3D_ in name).
Args:
directory: Directory to search
Returns:
Path to source video, or None if not found
"""
video_extensions = [".mp4", ".avi", ".mov", ".mkv", ".wmv", ".flv", ".webm"]
for ext in video_extensions:
for video_file in directory.glob(f"*{ext}"):
# Skip processed output files (they contain _3D_)
if "_3D_" not in video_file.name:
return video_file
return None
def get_system_info() -> dict[str, Any]:
"""Get system information including GPU details"""
import torch # Import here to avoid early CUDA initialization
info = {
"gpu_device": "CPU",
"vram_usage": "N/A",
"device_mode": "CPU",
"cuda_available": False,
}
try:
if torch.cuda.is_available():
info["cuda_available"] = True
info["gpu_device"] = torch.cuda.get_device_name(0)
info["device_mode"] = "GPU"
# Get VRAM usage
try:
allocated = torch.cuda.memory_allocated() / BYTES_TO_GB_DIVISOR
total_memory = (
torch.cuda.get_device_properties(0).total_memory / BYTES_TO_GB_DIVISOR
)
info["vram_usage"] = f"{allocated:.1f}GB / {total_memory:.1f}GB"
except Exception as vram_error:
vprint(f"Error getting VRAM details: {vram_error}")
info["vram_usage"] = "N/A"
except Exception as e:
vprint(f"Error getting GPU info: {e}")
return info
class ProgressCallback:
"""Enhanced callback class to track processing progress for both serial and batch modes"""
def __init__(
self,
session_id: str,
total_frames: int,
processing_mode: str = "serial",
enable_live_preview: bool = True,
preview_update_interval: float = PREVIEW_UPDATE_INTERVAL,
) -> None:
self.session_id = session_id
self.total_frames = total_frames
self.processing_mode = processing_mode
self.current_frame = 0
self.last_update_time = 0
self.current_phase = "extraction" # extraction, processing, video
self.step_start_times = {} # Track start time for each step
self.current_step_name = None
self.start_time = time.time() # For ETA calculation
self.step_frame_times = [] # Track recent frame times for current step
self.step_start_progress = 0 # Track progress when step started
# Preview tracking
self.enable_live_preview = enable_live_preview
self.last_preview_time = 0
self.preview_interval = preview_update_interval
self.preview_downscale_width = PREVIEW_DOWNSCALE_WIDTH
# Step tracking (used for all modes)
# Note: These must match the step names sent from video_processor.py
self.steps = [
"Frame Extraction", # Step 1: FFmpeg extracts frames
"Depth Map Generation", # Step 2: AI generates depth maps
"Stereo Pair Creation", # Step 3: Create L/R stereo pairs (includes frame loading)
"Fisheye Distortion", # Step 4: Apply distortion (optional)
"Crop Frames", # Step 5: Crop frames for VR
"AI Upscaling", # Step 6: AI upscaling (optional)
"VR Assembly", # Step 7: Assemble final VR frames
"Video Creation", # Step 8: FFmpeg creates video
]
# Weighted progress based on actual timing measurements
# [2%, 35%, 20%, 8%, 2%, 18%, 8%, 7%] = 100%
self.step_weights = PROGRESS_STEP_WEIGHTS
self.current_step_index = 0
self.step_progress = 0
self.step_total = 0
def send_preview_frame(
self,
frame_path: Path,
frame_type: str,
frame_number: int,
) -> None:
"""
Send preview frame via websocket.
Args:
frame_path: Path to the frame image file
frame_type: Type of frame ("depth_map", "stereo_left", "stereo_right", "vr_frame")
frame_number: Frame number being processed
"""
# Check if preview is enabled
if not self.enable_live_preview:
return
current_time = time.time()
# Throttle preview updates
if current_time - self.last_preview_time < self.preview_interval:
return
try:
# Read frame
frame = cv2.imread(str(frame_path))
if frame is None:
vprint(f"Preview: Failed to read frame {frame_path}")
return
# Validate dimensions
height, width = frame.shape[:2]
if width <= 0 or height <= 0:
vprint(f"Preview: Invalid dimensions {width}x{height} for {frame_path}")
return
# Cap input size to prevent excessive processing
MAX_INPUT_DIM = 8192 # Reject frames larger than 8K
if width > MAX_INPUT_DIM or height > MAX_INPUT_DIM:
vprint(
f"Preview: Frame too large ({width}x{height}), "
f"exceeds max {MAX_INPUT_DIM}px"
)
return
# Downscale for transmission
scale = self.preview_downscale_width / width
new_width = self.preview_downscale_width
new_height = int(height * scale)
frame_small = cv2.resize(frame, (new_width, new_height))
# Encode to base64
_, buffer = cv2.imencode(".png", frame_small)
img_base64 = base64.b64encode(buffer).decode("utf-8")
# Cap base64 payload size
MAX_PAYLOAD_KB = 500
payload_size_kb = len(img_base64) / 1024
if payload_size_kb > MAX_PAYLOAD_KB:
vprint(
f"Preview: Payload too large ({payload_size_kb:.1f}KB), "
f"skipping {frame_type} frame {frame_number}"
)
return
# Send via socketio
preview_data = {
"frame_type": frame_type,
"frame_number": frame_number,
"image_data": f"data:image/png;base64,{img_base64}",
"dimensions": {"width": new_width, "height": new_height},
}
socketio.emit("frame_preview", preview_data, room=self.session_id)
self.last_preview_time = current_time
except Exception as e:
# Log error but don't interrupt processing
vprint(
f"Preview: Error sending {frame_type} frame {frame_number}: "
f"{type(e).__name__}: {e}"
)
def send_preview_frame_from_array(
self,
frame_array: np.ndarray,
frame_type: str,
frame_number: int,
) -> None:
"""
Send preview frame directly from numpy array (no disk I/O).
Args:
frame_array: Frame as numpy array (BGR format)
frame_type: Type of frame ("depth_map", "stereo_left", "upscaled_left", etc)
frame_number: Frame number being processed
"""
# Check if preview is enabled
if not self.enable_live_preview:
return
current_time = time.time()
# Throttle preview updates
if current_time - self.last_preview_time < self.preview_interval:
return
try:
# Validate dimensions
height, width = frame_array.shape[:2]
if width <= 0 or height <= 0:
vprint(f"Preview: Invalid dimensions {width}x{height}")
return
# Cap input size
MAX_INPUT_DIM = 8192
if width > MAX_INPUT_DIM or height > MAX_INPUT_DIM:
vprint(
f"Preview: Frame too large ({width}x{height}), "
f"exceeds max {MAX_INPUT_DIM}px"
)
return
# Downscale for transmission
scale = self.preview_downscale_width / width
new_width = self.preview_downscale_width
new_height = int(height * scale)
frame_small = cv2.resize(frame_array, (new_width, new_height))
# Encode to base64
_, buffer = cv2.imencode(".png", frame_small)
img_base64 = base64.b64encode(buffer).decode("utf-8")
# Cap base64 payload size
MAX_PAYLOAD_KB = 500
payload_size_kb = len(img_base64) / 1024
if payload_size_kb > MAX_PAYLOAD_KB:
vprint(
f"Preview: Payload too large ({payload_size_kb:.1f}KB), "
f"skipping {frame_type} frame {frame_number}"
)
return
# Send via socketio
preview_data = {
"frame_type": frame_type,
"frame_number": frame_number,
"image_data": f"data:image/png;base64,{img_base64}",
"dimensions": {"width": new_width, "height": new_height},
}
socketio.emit("frame_preview", preview_data, room=self.session_id)
self.last_preview_time = current_time
except Exception as e:
# Log error but don't interrupt processing
vprint(
f"Preview: Error sending {frame_type} frame {frame_number}: "
f"{type(e).__name__}: {e}"
)
def _calculate_eta(self, current_progress: float) -> str | None:
"""
Calculate estimated time remaining using adaptive per-step timing.
This uses actual measured time-per-frame for the current step and
weights for remaining steps, providing much more accurate ETAs
especially for slow steps like ESRGAN upscaling.
Args:
current_progress: Current progress percentage (0-100)
Returns:
Formatted ETA string (e.g., "5m 23s") or None if not enough data
"""
if current_progress <= 0:
return None
current_time = time.time()
elapsed = current_time - self.start_time
# Need at least 3 seconds of data for reasonable estimate
if elapsed < 3:
return None
# Strategy: Estimate remaining time for current step + remaining steps
remaining_time = 0
# 1. Estimate remaining time for current step
if self.step_total > 0 and self.step_progress > 0:
# Use actual measured rate for current step
step_start_time = self.step_start_times.get(self.current_step_name, self.start_time)
step_elapsed = current_time - step_start_time
# Need at least 2 seconds into current step for accurate measurement
if step_elapsed >= 2:
frames_completed = self.step_progress
frames_remaining = self.step_total - self.step_progress
# Calculate time per frame for this step
time_per_frame = step_elapsed / max(frames_completed, 1)
# Estimate remaining time for this step
step_remaining_time = time_per_frame * frames_remaining
remaining_time += step_remaining_time
# 2. Estimate time for remaining steps using weight-based projection
# Use the overall rate as a fallback for future steps
if self.current_step_index < len(self.steps) - 1:
# Estimate time based on overall average rate (fallback for steps we haven't measured)
if current_progress > 5: # Need some progress to estimate
avg_time_per_percent = elapsed / current_progress
remaining_percent = 100 - current_progress
fallback_estimate = avg_time_per_percent * remaining_percent
# Weight between step-based estimate and fallback
# Use step-based more heavily when we have good data
if remaining_time > 0:
# We have a step-based estimate, use it primarily
remaining_time = remaining_time * 0.8 + fallback_estimate * 0.2
else:
# Fall back to overall rate
remaining_time = fallback_estimate
# Fallback: use simple overall progress rate if we don't have step data
if remaining_time <= 0 and current_progress > 0:
progress_ratio = current_progress / 100.0
estimated_total_time = elapsed / progress_ratio
remaining_time = estimated_total_time - elapsed
if remaining_time < 0:
return None
return self._format_time(remaining_time)
def _format_time(self, seconds: float) -> str:
"""Format seconds as human-readable time (e.g., '5m 23s' or '2h 15m')."""
if seconds < 60:
return f"{int(seconds)}s"
elif seconds < 3600: # Less than 1 hour
minutes = int(seconds // 60)
secs = int(seconds % 60)
return f"{minutes}m {secs}s"
else: # 1 hour or more
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
return f"{hours}h {minutes}m"
def update_progress( # noqa: C901
self,
stage: str,
frame_num: int | None = None,
phase: str | None = None,
step_name: str | None = None,
step_progress: int | None = None,
step_total: int | None = None,
) -> None:
import time
# Check if stop has been requested
if current_processing.get("stop_requested", False):
raise InterruptedError("Processing stopped by user request")
# Throttle updates to avoid threading issues
current_time = time.time()
if current_time - self.last_update_time < PROGRESS_UPDATE_INTERVAL:
return
self.last_update_time = current_time
# Update phase if provided
if phase:
self.current_phase = phase
# Track step changes and timing
if step_name and step_name != self.current_step_name:
# New step started
self.current_step_name = step_name
self.step_start_times[step_name] = current_time
self.step_frame_times = [] # Reset frame times for new step
self.step_start_progress = 0 # Reset progress for new step
# Update step index
if step_name in self.steps:
self.current_step_index = self.steps.index(step_name)
# Update step progress
if step_progress is not None:
self.step_progress = step_progress
if step_total is not None:
self.step_total = step_total
if frame_num is not None:
self.current_frame = frame_num
current_processing["current_frame"] = frame_num
current_processing["stage"] = stage
# Calculate overall progress using weighted steps
step_progress_ratio = (
(self.step_progress / max(self.step_total, 1)) if self.step_total > 0 else 0
)
# Sum weights of all completed steps
cumulative_weight = sum(self.step_weights[: self.current_step_index])
# Add weighted progress of current step
if self.current_step_index < len(self.step_weights):
cumulative_weight += step_progress_ratio * self.step_weights[self.current_step_index]
progress = round(cumulative_weight * 100, PROGRESS_DECIMAL_PLACES)
current_processing["progress"] = round(progress, PROGRESS_DECIMAL_PLACES)
current_processing["phase"] = self.current_phase
current_processing["processing_mode"] = self.processing_mode
current_processing["step_name"] = step_name or self.current_step_name
current_processing["step_progress"] = self.step_progress
current_processing["step_total"] = self.step_total
current_processing["step_index"] = self.current_step_index
# Calculate ETA
eta_str = self._calculate_eta(progress)
# Emit progress update (always include step data for UI)
progress_data = {
"progress": current_processing["progress"],
"stage": stage,
"current_frame": self.current_frame,
"total_frames": self.total_frames,
"phase": self.current_phase,
"processing_mode": self.processing_mode,
"step_name": self.current_step_name or "",
"step_progress": self.step_progress,
"step_total": self.step_total,
"step_index": self.current_step_index,
"total_steps": len(self.steps),
"eta": eta_str, # Add ETA
}
# Console output - show both overall and step progress
step_percent = (
(self.step_progress / max(self.step_total, 1)) * 100 if self.step_total > 0 else 0
)
eta_suffix = f" | ETA: {eta_str}" if eta_str else ""
progress_msg = (
f"Overall: {progress:05.1f}% | "
f"Step: {step_percent:03.0f}% ({self.step_progress:04d}/{self.step_total:04d}) | "
f"{stage}{eta_suffix}"
)
print(progress_msg)
try:
# Emit progress (socketio.start_background_task handles context automatically)
socketio.emit("progress_update", progress_data, room=self.session_id)
# Yield control to allow message to be sent immediately (fixes buffering issue)
socketio.sleep(SOCKETIO_SLEEP_YIELD)
except Exception as e:
print(console_warning(f"Error emitting progress: {e}"))
import traceback
traceback.print_exc()
def get_step_duration(self):
"""Get duration of current step in seconds."""
import time
if self.current_step_name and self.current_step_name in self.step_start_times:
return time.time() - self.step_start_times[self.current_step_name]
return 0
def finish(self, message: str = "Processing complete"):
"""Finish progress tracking (compatibility with ProgressTracker interface)."""
print(f"{message}")
try:
socketio.emit(
"processing_complete",
{"success": True, "message": message},
room=self.session_id,
)
# Allow time for message to be sent
socketio.sleep(SOCKETIO_SLEEP_YIELD)
except Exception as e:
print(console_warning(f"Error emitting completion: {e}"))
def process_video_async( # noqa: C901
session_id: str, video_path: str | Path, settings: dict[str, Any], output_dir: str | Path
) -> None:
"""Process video in background thread"""
import torch # Import here to avoid CUDA initialization issues in main thread
try:
current_processing["active"] = True
current_processing["session_id"] = session_id
current_processing["stop_requested"] = False
# Check CUDA availability in this thread
cuda_available = torch.cuda.is_available()
if cuda_available:
print(f"CUDA detected: {torch.cuda.get_device_name(0)}")
else:
print("CUDA not available, using CPU")
# Initialize projector with depth model
# Get depth model version (v2 or v3)
depth_model_version = settings.get("depth_model_version", "v3") # Default to V3
model_size = settings.get("model_size", "vitb") # Default to Base for 16GB GPUs
use_metric = settings.get("use_metric_depth", True) # Default to metric depth
device = settings.get("device", "auto")
if device == "auto":
device = "cuda" if cuda_available else "cpu"
# Fail fast if GPU requested but not available
if device == "cuda" and not cuda_available:
error_msg = (
"GPU (CUDA) requested but not available. "
"Please select 'Auto' or 'Force CPU' in Processing Device settings."
)
raise Exception(error_msg)
# For V2, select the appropriate model path based on size and metric/relative
# For V3, model_path is just the model name (e.g., "large", "base", "small")
if depth_model_version == "v2":
if use_metric:
model_paths_dict = MODEL_PATHS_METRIC
depth_type = "Metric"
else:
model_paths_dict = MODEL_PATHS
depth_type = "Relative"
model_path = settings.get(
"model_path",
model_paths_dict.get(model_size, MODEL_PATHS_METRIC["vitb"]),
)
print(
f"Loading Video-Depth-Anything V2: {model_size.upper()} {depth_type} from: {model_path}"
)
else:
# For V3, map model_size to DA3 model names
da3_model_map = {"vits": "small", "vitb": "base", "vitl": "large"}
model_path = da3_model_map.get(model_size, "large")
print(f"Loading Depth-Anything V3: {model_path.upper()} model (metric: {use_metric})")
print(f"Using device: {device.upper()}")
projector = create_stereo_projector(
model_path,
device,
metric=use_metric,
depth_model_version=depth_model_version,
)
# Ensure the model is loaded before processing
if not projector.depth_estimator.load_model():
raise Exception("Failed to load depth estimation model")
# Get video info for progress tracking
video_info = get_video_info(video_path)
if not video_info:
raise Exception("Could not read video file")
# Calculate expected frame count based on time range and ORIGINAL fps (since we extract at original fps)
start_time = settings.get("start_time")
end_time = settings.get("end_time")
# Always use original FPS for frame count calculation (interpolation happens at the end)
original_fps = video_info["fps"]
# Calculate actual frame range that will be extracted (matching VideoProcessor logic)
from depth_surge_3d.utils.path_utils import calculate_frame_range
start_frame, end_frame = calculate_frame_range(
video_info["frame_count"], original_fps, start_time, end_time
)
expected_frames = end_frame - start_frame
processing_mode = settings.get("processing_mode", "serial")
enable_live_preview = settings.get("enable_live_preview", True)
preview_update_interval = settings.get("preview_update_interval", PREVIEW_UPDATE_INTERVAL)
callback = ProgressCallback(
session_id,
expected_frames,
processing_mode,
enable_live_preview,
preview_update_interval,
)
# Give client time to join the session room before starting processing
socketio.sleep(INITIAL_PROCESSING_DELAY)
# Use the appropriate processor based on processing mode
if processing_mode == "batch":
from depth_surge_3d.processing.batch_processor import BatchProcessor
processor = BatchProcessor(projector.depth_estimator)
else:
processor = VideoProcessor(projector.depth_estimator)
# Calculate resolution settings that VideoProcessor expects
from depth_surge_3d.utils.domain.resolution import (
get_resolution_dimensions,
calculate_vr_output_dimensions,
auto_detect_resolution,
)
# Resolve VR resolution if auto
vr_resolution = settings.get("vr_resolution", "auto")
if vr_resolution == "auto":
vr_resolution = auto_detect_resolution(
video_info["width"],
video_info["height"],
settings.get("vr_format", "side_by_side"),
)
# Get resolution dimensions
per_eye_width, per_eye_height = get_resolution_dimensions(vr_resolution)
vr_output_width, vr_output_height = calculate_vr_output_dimensions(
per_eye_width, per_eye_height, settings.get("vr_format", "side_by_side")
)
# Add calculated dimensions to settings
settings.update(
{
"per_eye_width": per_eye_width,
"per_eye_height": per_eye_height,
"vr_output_width": vr_output_width,
"vr_output_height": vr_output_height,
"source_width": video_info["width"],
"source_height": video_info["height"],
"source_fps": video_info["fps"],
}
)
success = processor.process(
video_path=video_path,
output_dir=output_dir,
video_properties=video_info,
settings=settings,
progress_callback=callback,
)
if not success:
raise Exception("Video processing failed")
# Processing complete
try:
socketio.emit(
"processing_complete",
{
"success": True,
"output_dir": str(output_dir),
"message": "Video processing completed successfully!",
},
room=session_id,