Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ M3U8_SUPPORT=False
# Enable Aria2 for downloads (True/False)
ENABLE_ARIA2=False

# Enable Cloudflare bypass for direct downloads (True/False)
BYPASS_CLOUDFLARE=True

# Path to Rclone executable
RCLONE_PATH=

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Default template for PDM package"
authors = [
{name = "Benny", email = "benny.think@gmail.com"},
]
dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1"]
dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "ai-cloudscraper>=3.8.4"]
requires-python = ">=3.10"
readme = "README.md"
license = {text = "Apache2.0"}
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ psutil>=7.2.2
ffpb>=0.4.1
cryptography>=46.0.4
kurigram==2.2.18
yt-dlp[default,curl-cffi]==2026.1.31
yt-dlp[default,curl-cffi]==2026.1.31
ai-cloudscraper>=3.8.4
1 change: 1 addition & 0 deletions src/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_env(name: str, default=None):
AUDIO_FORMAT = get_env("AUDIO_FORMAT", "m4a")
M3U8_SUPPORT = get_env("M3U8_SUPPORT")
ENABLE_ARIA2 = get_env("ENABLE_ARIA2")
BYPASS_CLOUDFLARE = get_env("BYPASS_CLOUDFLARE", True)

RCLONE_PATH = get_env("RCLONE")

Expand Down
6 changes: 4 additions & 2 deletions src/engine/direct.py
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about aria2 part?
Do you have any suggestions?

Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
import filetype
import requests

from config import ENABLE_ARIA2, TMPFILE_PATH
from config import BYPASS_CLOUDFLARE, ENABLE_ARIA2, TMPFILE_PATH
from engine.base import BaseDownloader
from utils.http_client import get_http_client


class DirectDownload(BaseDownloader):
Expand All @@ -40,7 +41,8 @@ def _setup_formats(self) -> list | None:

def _requests_download(self):
logging.info("Requests download with url %s", self._url)
response = requests.get(self._url, stream=True)
client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE)
response = client.get(self._url, stream=True)
response.raise_for_status()
file = Path(self._tempdir.name).joinpath(uuid4().hex)
with open(file, "wb") as f:
Expand Down
85 changes: 85 additions & 0 deletions src/test_cloudflare_bypass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
# coding: utf-8

# ytdlbot - test_cloudflare_bypass.py
# Test script for Cloudflare bypass functionality using ai-cloudscraper

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent))

from utils.http_client import BypassHTTPClient, get_realistic_headers


def test_cloudflare_bypass():
print("=" * 50)
print("Testing Cloudflare Bypass (ai-cloudscraper)")
print("=" * 50)

test_sites = [
("https://nowsecure.nl/", "NowSecure (Cloudflare protected)"),
("https://www.google.com/", "Google (no protection)"),
]

client = BypassHTTPClient(bypass_enabled=True)

for url, description in test_sites:
print(f"\nTesting: {description}")
print(f"URL: {url}")

try:
resp = client.get(url, timeout=10)
print(f"Status: {resp.status_code}")
print(f"Content length: {len(resp.text)} chars")
print("Result: OK")
except Exception as e:
print(f"Error: {e}")
print("Result: FAILED")

client.close()
print("\n" + "=" * 50)
print("Test completed!")
print("=" * 50)


def test_headers():
print("\n" + "=" * 50)
print("Testing Realistic Headers")
print("=" * 50)

headers = get_realistic_headers()

for key, value in headers.items():
print(f"{key}: {value}")

print("\nResult: OK")


def test_direct_download_usage():
print("\n" + "=" * 50)
print("Testing DirectDownload Usage Pattern")
print("=" * 50)

from utils.http_client import get_http_client

client = get_http_client(bypass_enabled=True)

print("\nTesting single instance pattern:")
print(f"Client type: {type(client).__name__}")

try:
resp = client.get("https://httpbin.org/headers", timeout=5)
print(f"Status: {resp.status_code}")
print("Result: OK")
except Exception as e:
print(f"Error: {e}")
print("Result: FAILED")

print("\n" + "=" * 50)


if __name__ == "__main__":
test_cloudflare_bypass()
test_headers()
test_direct_download_usage()
153 changes: 153 additions & 0 deletions src/utils/http_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#!/usr/bin/env python3
# coding: utf-8

# ytdlbot - http_client.py
# HTTP client with Cloudflare bypass support using ai-cloudscraper

__author__ = "yeshua-aguilar"

import logging
from typing import Optional

import cloudscraper
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


class BypassHTTPClient:
"""HTTP client that automatically bypasses Cloudflare protection."""

def __init__(self, bypass_enabled: bool = True, timeout: int = 30):
self._bypass_enabled = bypass_enabled
self._timeout = timeout
self._session: Optional[requests.Session] = None
self._scraper: Optional[cloudscraper.CloudScraper] = None

self._user_agent = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)

self._headers = {
"User-Agent": self._user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
}

def _create_scraper(self) -> cloudscraper.CloudScraper:
scraper = cloudscraper.create_scraper(
browser={
"browser": "chrome",
"platform": "windows",
"desktop": True,
},
delay=10,
)
scraper.headers.update(self._headers)
return scraper

def _create_session(self) -> requests.Session:
session = requests.Session()
session.headers.update(self._headers)

retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("http://", adapter)
session.mount("https://", adapter)

return session

def get(self, url: str, **kwargs) -> requests.Response:
"""Make a GET request with Cloudflare bypass if needed."""
kwargs.setdefault("timeout", self._timeout)

if self._bypass_enabled:
return self._bypass_get(url, **kwargs)
return self._normal_get(url, **kwargs)

def _bypass_get(self, url: str, **kwargs) -> requests.Response:
"""Try Cloudflare bypass first, fallback to normal request."""
try:
if self._scraper is None:
self._scraper = self._create_scraper()

logging.debug("Attempting Cloudflare bypass for %s", url)
response = self._scraper.get(url, **kwargs)

if response.status_code == 403 and "cloudflare" in response.text.lower():
logging.warning("Cloudflare bypass failed, trying normal request")
return self._normal_get(url, **kwargs)

return response
except Exception as e:
logging.warning("Cloudflare bypass error: %s, falling back to normal request", e)
return self._normal_get(url, **kwargs)

def _normal_get(self, url: str, **kwargs) -> requests.Response:
"""Make a normal GET request without bypass."""
if self._session is None:
self._session = self._create_session()

return self._session.get(url, **kwargs)

def close(self):
"""Close all sessions."""
if self._session:
self._session.close()
self._session = None
if self._scraper:
self._scraper.close()
self._scraper = None


_client_instance: Optional[BypassHTTPClient] = None


def get_http_client(bypass_enabled: bool = True) -> BypassHTTPClient:
"""Get or create a shared HTTP client instance."""
global _client_instance
if _client_instance is None:
_client_instance = BypassHTTPClient(bypass_enabled=bypass_enabled)
return _client_instance


def get_cloudflare_bypass_session() -> cloudscraper.CloudScraper:
"""Get a CloudScraper session for yt-dlp or other libraries."""
scraper = cloudscraper.create_scraper(
browser={
"browser": "chrome",
"platform": "windows",
"desktop": True,
},
delay=10,
)
return scraper


def get_realistic_headers() -> dict:
"""Get realistic browser headers for manual use."""
return {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
}