-
-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Expand file tree
/
Copy pathMakefile
More file actions
120 lines (106 loc) · 4.41 KB
/
Makefile
File metadata and controls
120 lines (106 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# YouTube Design Extractor - Setup and Usage
# ==========================================
PYTHON := python3
PIP := pip3
SCRIPT := tools/yt-design-extractor.py
.PHONY: help install install-ocr install-easyocr deps check run run-full run-ocr run-transcript clean
help:
@echo "YouTube Design Extractor"
@echo "========================"
@echo ""
@echo "Setup (run in order):"
@echo " make install-ocr Install system tools (tesseract + ffmpeg)"
@echo " make install Install Python dependencies"
@echo " make deps Show what's installed"
@echo ""
@echo "Optional:"
@echo " make install-easyocr Install EasyOCR + PyTorch (~2GB, for stylized text)"
@echo ""
@echo "Usage:"
@echo " make run URL=<youtube-url> Basic extraction"
@echo " make run-full URL=<youtube-url> Full extraction (OCR + colors + scene)"
@echo " make run-ocr URL=<youtube-url> With OCR only"
@echo " make run-transcript URL=<youtube-url> Transcript + metadata only"
@echo ""
@echo "Examples:"
@echo " make run URL='https://youtu.be/eVnQFWGDEdY'"
@echo " make run-full URL='https://youtu.be/eVnQFWGDEdY' INTERVAL=15"
@echo ""
@echo "Options (pass as make variables):"
@echo " URL=<url> YouTube video URL (required)"
@echo " INTERVAL=<secs> Frame interval in seconds (default: 30)"
@echo " OUTPUT=<dir> Output directory"
@echo " ENGINE=<engine> OCR engine: tesseract (default) or easyocr"
# Installation targets
install:
$(PIP) install -r tools/requirements.txt
install-ocr:
@echo "Installing Tesseract OCR + ffmpeg..."
@if command -v apt-get >/dev/null 2>&1; then \
sudo apt-get update && sudo apt-get install -y tesseract-ocr ffmpeg; \
elif command -v brew >/dev/null 2>&1; then \
brew install tesseract ffmpeg; \
elif command -v dnf >/dev/null 2>&1; then \
sudo dnf install -y tesseract ffmpeg; \
else \
echo "Please install tesseract-ocr and ffmpeg manually"; \
exit 1; \
fi
install-easyocr:
@echo "Installing PyTorch (CPU) + EasyOCR (~2GB download)..."
$(PIP) install torch torchvision --index-url https://download.pytorch.org/whl/cpu
$(PIP) install easyocr
deps:
@echo "Checking dependencies..."
@echo ""
@echo "System tools:"
@command -v ffmpeg >/dev/null 2>&1 && echo " ✓ ffmpeg" || echo " ✗ ffmpeg (run: make install-ocr)"
@command -v tesseract >/dev/null 2>&1 && echo " ✓ tesseract" || echo " ✗ tesseract (run: make install-ocr)"
@echo ""
@echo "Python packages (required):"
@$(PYTHON) -c "import yt_dlp; print(' ✓ yt-dlp', yt_dlp.version.__version__)" 2>/dev/null || echo " ✗ yt-dlp (run: make install)"
@$(PYTHON) -c "from youtube_transcript_api import YouTubeTranscriptApi; print(' ✓ youtube-transcript-api')" 2>/dev/null || echo " ✗ youtube-transcript-api (run: make install)"
@$(PYTHON) -c "from PIL import Image; print(' ✓ Pillow')" 2>/dev/null || echo " ✗ Pillow (run: make install)"
@$(PYTHON) -c "import pytesseract; print(' ✓ pytesseract')" 2>/dev/null || echo " ✗ pytesseract (run: make install)"
@$(PYTHON) -c "from colorthief import ColorThief; print(' ✓ colorthief')" 2>/dev/null || echo " ✗ colorthief (run: make install)"
@echo ""
@echo "Optional (for stylized text OCR):"
@$(PYTHON) -c "import easyocr; print(' ✓ easyocr')" 2>/dev/null || echo " ○ easyocr (run: make install-easyocr)"
check:
@$(PYTHON) $(SCRIPT) --help >/dev/null && echo "✓ Script is working" || echo "✗ Script failed"
# Run targets
INTERVAL ?= 30
ENGINE ?= tesseract
OUTPUT ?=
run:
ifndef URL
@echo "Error: URL is required"
@echo "Usage: make run URL='https://youtu.be/VIDEO_ID'"
@exit 1
endif
$(PYTHON) $(SCRIPT) "$(URL)" --interval $(INTERVAL) $(if $(OUTPUT),-o $(OUTPUT))
run-full:
ifndef URL
@echo "Error: URL is required"
@echo "Usage: make run-full URL='https://youtu.be/VIDEO_ID'"
@exit 1
endif
$(PYTHON) $(SCRIPT) "$(URL)" --full --interval $(INTERVAL) --ocr-engine $(ENGINE) $(if $(OUTPUT),-o $(OUTPUT))
run-ocr:
ifndef URL
@echo "Error: URL is required"
@echo "Usage: make run-ocr URL='https://youtu.be/VIDEO_ID'"
@exit 1
endif
$(PYTHON) $(SCRIPT) "$(URL)" --ocr --interval $(INTERVAL) --ocr-engine $(ENGINE) $(if $(OUTPUT),-o $(OUTPUT))
run-transcript:
ifndef URL
@echo "Error: URL is required"
@echo "Usage: make run-transcript URL='https://youtu.be/VIDEO_ID'"
@exit 1
endif
$(PYTHON) $(SCRIPT) "$(URL)" --transcript-only $(if $(OUTPUT),-o $(OUTPUT))
# Cleanup
clean:
rm -rf yt-extract-*
@echo "Cleaned up extraction directories"