tgbot-collection · yeshua-aguilar · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/.env.example b/.env.example
@@ -36,6 +36,9 @@ M3U8_SUPPORT=False
 # Enable Aria2 for downloads (True/False)
 ENABLE_ARIA2=False
 
+# Enable Cloudflare bypass for direct downloads (True/False)
+BYPASS_CLOUDFLARE=True
+
 # Path to Rclone executable
 RCLONE_PATH=
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ description = "Default template for PDM package"
 authors = [
     {name = "Benny", email = "benny.think@gmail.com"},
 ]
-dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1"]
+dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "ai-cloudscraper>=3.8.4"]
 requires-python = ">=3.10"
 readme = "README.md"
 license = {text = "Apache2.0"}

diff --git a/requirements.txt b/requirements.txt
@@ -16,4 +16,5 @@ psutil>=7.2.2
 ffpb>=0.4.1
 cryptography>=46.0.4
 kurigram==2.2.18
-yt-dlp[default,curl-cffi]==2026.1.31
+yt-dlp[default,curl-cffi]==2026.1.31
+ai-cloudscraper>=3.8.4
diff --git a/src/config/config.py b/src/config/config.py
@@ -39,6 +39,7 @@ def get_env(name: str, default=None):
 AUDIO_FORMAT = get_env("AUDIO_FORMAT", "m4a")
 M3U8_SUPPORT = get_env("M3U8_SUPPORT")
 ENABLE_ARIA2 = get_env("ENABLE_ARIA2")
+BYPASS_CLOUDFLARE = get_env("BYPASS_CLOUDFLARE", True)
 
 RCLONE_PATH = get_env("RCLONE")
 

diff --git a/src/engine/direct.py b/src/engine/direct.py
@@ -15,8 +15,9 @@
 import filetype
 import requests
 
-from config import ENABLE_ARIA2, TMPFILE_PATH
+from config import BYPASS_CLOUDFLARE, ENABLE_ARIA2, TMPFILE_PATH
 from engine.base import BaseDownloader
+from utils.http_client import get_http_client
 
 
 class DirectDownload(BaseDownloader):
@@ -40,7 +41,8 @@ def _setup_formats(self) -> list | None:
 
     def _requests_download(self):
         logging.info("Requests download with url %s", self._url)
-        response = requests.get(self._url, stream=True)
+        client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE)
+        response = client.get(self._url, stream=True)
         response.raise_for_status()
         file = Path(self._tempdir.name).joinpath(uuid4().hex)
         with open(file, "wb") as f:

diff --git a/src/test_cloudflare_bypass.py b/src/test_cloudflare_bypass.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+# ytdlbot - test_cloudflare_bypass.py
+# Test script for Cloudflare bypass functionality using ai-cloudscraper
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from utils.http_client import BypassHTTPClient, get_realistic_headers
+
+
+def test_cloudflare_bypass():
+    print("=" * 50)
+    print("Testing Cloudflare Bypass (ai-cloudscraper)")
+    print("=" * 50)
+
+    test_sites = [
+        ("https://nowsecure.nl/", "NowSecure (Cloudflare protected)"),
+        ("https://www.google.com/", "Google (no protection)"),
+    ]
+
+    client = BypassHTTPClient(bypass_enabled=True)
+
+    for url, description in test_sites:
+        print(f"\nTesting: {description}")
+        print(f"URL: {url}")
+
+        try:
+            resp = client.get(url, timeout=10)
+            print(f"Status: {resp.status_code}")
+            print(f"Content length: {len(resp.text)} chars")
+            print("Result: OK")
+        except Exception as e:
+            print(f"Error: {e}")
+            print("Result: FAILED")
+
+    client.close()
+    print("\n" + "=" * 50)
+    print("Test completed!")
+    print("=" * 50)
+
+
+def test_headers():
+    print("\n" + "=" * 50)
+    print("Testing Realistic Headers")
+    print("=" * 50)
+
+    headers = get_realistic_headers()
+
+    for key, value in headers.items():
+        print(f"{key}: {value}")
+
+    print("\nResult: OK")
+
+
+def test_direct_download_usage():
+    print("\n" + "=" * 50)
+    print("Testing DirectDownload Usage Pattern")
+    print("=" * 50)
+
+    from utils.http_client import get_http_client
+
+    client = get_http_client(bypass_enabled=True)
+
+    print("\nTesting single instance pattern:")
+    print(f"Client type: {type(client).__name__}")
+
+    try:
+        resp = client.get("https://httpbin.org/headers", timeout=5)
+        print(f"Status: {resp.status_code}")
+        print("Result: OK")
+    except Exception as e:
+        print(f"Error: {e}")
+        print("Result: FAILED")
+
+    print("\n" + "=" * 50)
+
+
+if __name__ == "__main__":
+    test_cloudflare_bypass()
+    test_headers()
+    test_direct_download_usage()
diff --git a/src/utils/http_client.py b/src/utils/http_client.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+# ytdlbot - http_client.py
+# HTTP client with Cloudflare bypass support using ai-cloudscraper
+
+__author__ = "yeshua-aguilar"
+
+import logging
+from typing import Optional
+
+import cloudscraper
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+
+class BypassHTTPClient:
+    """HTTP client that automatically bypasses Cloudflare protection."""
+
+    def __init__(self, bypass_enabled: bool = True, timeout: int = 30):
+        self._bypass_enabled = bypass_enabled
+        self._timeout = timeout
+        self._session: Optional[requests.Session] = None
+        self._scraper: Optional[cloudscraper.CloudScraper] = None
+
+        self._user_agent = (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+            "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        )
+
+        self._headers = {
+            "User-Agent": self._user_agent,
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+            "Accept-Encoding": "gzip, deflate, br",
+            "DNT": "1",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+        }
+
+    def _create_scraper(self) -> cloudscraper.CloudScraper:
+        scraper = cloudscraper.create_scraper(
+            browser={
+                "browser": "chrome",
+                "platform": "windows",
+                "desktop": True,
+            },
+            delay=10,
+        )
+        scraper.headers.update(self._headers)
+        return scraper
+
+    def _create_session(self) -> requests.Session:
+        session = requests.Session()
+        session.headers.update(self._headers)
+
+        retry_strategy = Retry(
+            total=3,
+            backoff_factor=1,
+            status_forcelist=[429, 500, 502, 503, 504],
+        )
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        session.mount("http://", adapter)
+        session.mount("https://", adapter)
+
+        return session
+
+    def get(self, url: str, **kwargs) -> requests.Response:
+        """Make a GET request with Cloudflare bypass if needed."""
+        kwargs.setdefault("timeout", self._timeout)
+
+        if self._bypass_enabled:
+            return self._bypass_get(url, **kwargs)
+        return self._normal_get(url, **kwargs)
+
+    def _bypass_get(self, url: str, **kwargs) -> requests.Response:
+        """Try Cloudflare bypass first, fallback to normal request."""
+        try:
+            if self._scraper is None:
+                self._scraper = self._create_scraper()
+
+            logging.debug("Attempting Cloudflare bypass for %s", url)
+            response = self._scraper.get(url, **kwargs)
+
+            if response.status_code == 403 and "cloudflare" in response.text.lower():
+                logging.warning("Cloudflare bypass failed, trying normal request")
+                return self._normal_get(url, **kwargs)
+
+            return response
+        except Exception as e:
+            logging.warning("Cloudflare bypass error: %s, falling back to normal request", e)
+            return self._normal_get(url, **kwargs)
+
+    def _normal_get(self, url: str, **kwargs) -> requests.Response:
+        """Make a normal GET request without bypass."""
+        if self._session is None:
+            self._session = self._create_session()
+
+        return self._session.get(url, **kwargs)
+
+    def close(self):
+        """Close all sessions."""
+        if self._session:
+            self._session.close()
+            self._session = None
+        if self._scraper:
+            self._scraper.close()
+            self._scraper = None
+
+
+_client_instance: Optional[BypassHTTPClient] = None
+
+
+def get_http_client(bypass_enabled: bool = True) -> BypassHTTPClient:
+    """Get or create a shared HTTP client instance."""
+    global _client_instance
+    if _client_instance is None:
+        _client_instance = BypassHTTPClient(bypass_enabled=bypass_enabled)
+    return _client_instance
+
+
+def get_cloudflare_bypass_session() -> cloudscraper.CloudScraper:
+    """Get a CloudScraper session for yt-dlp or other libraries."""
+    scraper = cloudscraper.create_scraper(
+        browser={
+            "browser": "chrome",
+            "platform": "windows",
+            "desktop": True,
+        },
+        delay=10,
+    )
+    return scraper
+
+
+def get_realistic_headers() -> dict:
+    """Get realistic browser headers for manual use."""
+    return {
+        "User-Agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+            "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+        ),
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.5",
+        "Accept-Encoding": "gzip, deflate, br",
+        "DNT": "1",
+        "Connection": "keep-alive",
+        "Upgrade-Insecure-Requests": "1",
+        "Sec-Fetch-Dest": "document",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-Site": "none",
+        "Sec-Fetch-User": "?1",
+    }