From e3a2bae3faee9b878281d92417f15fc25ac7a6a7 Mon Sep 17 00:00:00 2001 From: yeshuaaguilar Date: Sat, 21 Feb 2026 21:22:21 -0500 Subject: [PATCH 1/5] feat: Adding a bypass to avoid Cloudflare bot blocking for websites --- .env.example | 3 + pyproject.toml | 2 +- requirements.txt | 3 +- src/config/config.py | 1 + src/engine/direct.py | 6 +- src/engine/generic.py | 4 +- src/engine/instagram.py | 7 +- src/utils/http_client.py | 153 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 src/utils/http_client.py diff --git a/.env.example b/.env.example index c46b9245..793b21f9 100644 --- a/.env.example +++ b/.env.example @@ -36,6 +36,9 @@ M3U8_SUPPORT=False # Enable Aria2 for downloads (True/False) ENABLE_ARIA2=False +# Enable Cloudflare bypass for direct downloads (True/False) +BYPASS_CLOUDFLARE=True + # Path to Rclone executable RCLONE_PATH= diff --git a/pyproject.toml b/pyproject.toml index 5aefbb20..dd236a83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Default template for PDM package" authors = [ {name = "Benny", email = "benny.think@gmail.com"}, ] -dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1"] +dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "cloudscraper>=1.2.71"] requires-python = ">=3.10" readme = "README.md" license = {text = "Apache2.0"} diff --git a/requirements.txt b/requirements.txt index 54783940..d54dde76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ psutil>=7.2.2 ffpb>=0.4.1 cryptography>=46.0.4 kurigram==2.2.18 -yt-dlp[default,curl-cffi]==2026.1.31 \ No newline at end of file +yt-dlp[default,curl-cffi]==2026.1.31 +cloudscraper>=1.2.71 \ No newline at end of file diff --git a/src/config/config.py b/src/config/config.py index ccf130f9..85abdca4 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -39,6 +39,7 @@ def get_env(name: str, default=None): AUDIO_FORMAT = get_env("AUDIO_FORMAT", "m4a") M3U8_SUPPORT = get_env("M3U8_SUPPORT") ENABLE_ARIA2 = get_env("ENABLE_ARIA2") +BYPASS_CLOUDFLARE = get_env("BYPASS_CLOUDFLARE", True) RCLONE_PATH = get_env("RCLONE") diff --git a/src/engine/direct.py b/src/engine/direct.py index 0388ad45..8f2d6c73 100644 --- a/src/engine/direct.py +++ b/src/engine/direct.py @@ -15,8 +15,9 @@ import filetype import requests -from config import ENABLE_ARIA2, TMPFILE_PATH +from config import BYPASS_CLOUDFLARE, ENABLE_ARIA2, TMPFILE_PATH from engine.base import BaseDownloader +from utils.http_client import get_http_client class DirectDownload(BaseDownloader): @@ -40,7 +41,8 @@ def _setup_formats(self) -> list | None: def _requests_download(self): logging.info("Requests download with url %s", self._url) - response = requests.get(self._url, stream=True) + client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE) + response = client.get(self._url, stream=True) response.raise_for_status() file = Path(self._tempdir.name).joinpath(uuid4().hex) with open(file, "wb") as f: diff --git a/src/engine/generic.py b/src/engine/generic.py index bbcf3d7b..ab3d0864 100644 --- a/src/engine/generic.py +++ b/src/engine/generic.py @@ -9,10 +9,11 @@ import yt_dlp -from config import AUDIO_FORMAT +from config import AUDIO_FORMAT, BYPASS_CLOUDFLARE from utils import is_youtube from database.model import get_format_settings, get_quality_settings from engine.base import BaseDownloader +from utils.http_client import get_realistic_headers def match_filter(info_dict): @@ -103,6 +104,7 @@ def _download(self, formats) -> list: "embed_metadata": True, "embed_thumbnail": True, "writethumbnail": False, + "http_headers": get_realistic_headers(), } # setup cookies for youtube only if is_youtube(self._url): diff --git a/src/engine/instagram.py b/src/engine/instagram.py index 216c71e0..ae1f8dc8 100644 --- a/src/engine/instagram.py +++ b/src/engine/instagram.py @@ -9,7 +9,9 @@ import filetype import requests +from config import BYPASS_CLOUDFLARE from engine.base import BaseDownloader +from utils.http_client import get_http_client class InstagramDownload(BaseDownloader): @@ -44,7 +46,8 @@ def _setup_formats(self) -> list | None: def _download(self, formats=None): try: - resp = requests.get(f"http://instagram:15000/?url={self._url}").json() + client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE) + resp = client.get(f"http://instagram:15000/?url={self._url}").json() except Exception as e: self._bot_msg.edit_text(f"Download failed!❌\n\n`{e}`") pass @@ -69,7 +72,7 @@ def _download(self, formats=None): continue try: - req = requests.get(link, stream=True) + req = client.get(link, stream=True) length = int(req.headers.get("content-length", 0) or req.headers.get("x-full-image-content-length", 0)) filename = f"Instagram_{code}-{counter}" save_path = pathlib.Path(self._tempdir.name, filename) diff --git a/src/utils/http_client.py b/src/utils/http_client.py new file mode 100644 index 00000000..c5d21a12 --- /dev/null +++ b/src/utils/http_client.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# ytdlbot - http_client.py +# HTTP client with Cloudflare bypass support + +__author__ = "Benny " + +import logging +from typing import Optional + +import cloudscraper +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + +class BypassHTTPClient: + """HTTP client that automatically bypasses Cloudflare protection.""" + + def __init__(self, bypass_enabled: bool = True, timeout: int = 30): + self._bypass_enabled = bypass_enabled + self._timeout = timeout + self._session: Optional[requests.Session] = None + self._scraper: Optional[cloudscraper.CloudScraper] = None + + self._user_agent = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ) + + self._headers = { + "User-Agent": self._user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + + def _create_scraper(self) -> cloudscraper.CloudScraper: + scraper = cloudscraper.create_scraper( + browser={ + "browser": "chrome", + "platform": "windows", + "desktop": True, + }, + delay=10, + ) + scraper.headers.update(self._headers) + return scraper + + def _create_session(self) -> requests.Session: + session = requests.Session() + session.headers.update(self._headers) + + retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session.mount("http://", adapter) + session.mount("https://", adapter) + + return session + + def get(self, url: str, **kwargs) -> requests.Response: + """Make a GET request with Cloudflare bypass if needed.""" + kwargs.setdefault("timeout", self._timeout) + + if self._bypass_enabled: + return self._bypass_get(url, **kwargs) + return self._normal_get(url, **kwargs) + + def _bypass_get(self, url: str, **kwargs) -> requests.Response: + """Try Cloudflare bypass first, fallback to normal request.""" + try: + if self._scraper is None: + self._scraper = self._create_scraper() + + logging.debug("Attempting Cloudflare bypass for %s", url) + response = self._scraper.get(url, **kwargs) + + if response.status_code == 403 and "cloudflare" in response.text.lower(): + logging.warning("Cloudflare bypass failed, trying normal request") + return self._normal_get(url, **kwargs) + + return response + except Exception as e: + logging.warning("Cloudflare bypass error: %s, falling back to normal request", e) + return self._normal_get(url, **kwargs) + + def _normal_get(self, url: str, **kwargs) -> requests.Response: + """Make a normal GET request without bypass.""" + if self._session is None: + self._session = self._create_session() + + return self._session.get(url, **kwargs) + + def close(self): + """Close all sessions.""" + if self._session: + self._session.close() + self._session = None + if self._scraper: + self._scraper.close() + self._scraper = None + + +_client_instance: Optional[BypassHTTPClient] = None + + +def get_http_client(bypass_enabled: bool = True) -> BypassHTTPClient: + """Get or create a shared HTTP client instance.""" + global _client_instance + if _client_instance is None: + _client_instance = BypassHTTPClient(bypass_enabled=bypass_enabled) + return _client_instance + + +def get_cloudflare_bypass_session() -> cloudscraper.CloudScraper: + """Get a CloudScraper session for yt-dlp or other libraries.""" + scraper = cloudscraper.create_scraper( + browser={ + "browser": "chrome", + "platform": "windows", + "desktop": True, + }, + delay=10, + ) + return scraper + + +def get_realistic_headers() -> dict: + """Get realistic browser headers for manual use.""" + return { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + } From d188cf6d5e6111a359fb59e2ea9b95713c9c1ca7 Mon Sep 17 00:00:00 2001 From: yeshuaaguilar Date: Sat, 21 Feb 2026 21:39:42 -0500 Subject: [PATCH 2/5] feat: creacion del test bypass cloudflare --- src/test_cloudflare_bypass.py | 61 +++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/test_cloudflare_bypass.py diff --git a/src/test_cloudflare_bypass.py b/src/test_cloudflare_bypass.py new file mode 100644 index 00000000..e6d48d51 --- /dev/null +++ b/src/test_cloudflare_bypass.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# ytdlbot - test_cloudflare_bypass.py +# Test script for Cloudflare bypass functionality + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent / "src")) + +from utils.http_client import BypassHTTPClient, get_realistic_headers + + +def test_cloudflare_bypass(): + print("=" * 50) + print("Testing Cloudflare Bypass") + print("=" * 50) + + test_sites = [ + ("https://nowsecure.nl/", "NowSecure (Cloudflare protected)"), + ("https://www.google.com/", "Google (no protection)"), + ] + + client = BypassHTTPClient(bypass_enabled=True) + + for url, description in test_sites: + print(f"\nTesting: {description}") + print(f"URL: {url}") + + try: + resp = client.get(url, timeout=10) + print(f"Status: {resp.status_code}") + print(f"Content length: {len(resp.text)} chars") + print("Result: OK") + except Exception as e: + print(f"Error: {e}") + print("Result: FAILED") + + client.close() + print("\n" + "=" * 50) + print("Test completed!") + print("=" * 50) + + +def test_headers(): + print("\n" + "=" * 50) + print("Testing Realistic Headers") + print("=" * 50) + + headers = get_realistic_headers() + + for key, value in headers.items(): + print(f"{key}: {value}") + + print("\nResult: OK") + + +if __name__ == "__main__": + test_cloudflare_bypass() + test_headers() From 8587f3b8ebc0501fa5bf5248d8a970b464d3648e Mon Sep 17 00:00:00 2001 From: yeshuaaguilar Date: Sat, 21 Feb 2026 21:55:13 -0500 Subject: [PATCH 3/5] feat: Add Cloudflare bypass support for direct downloads --- .env.example | 3 + pyproject.toml | 2 +- requirements.txt | 3 +- src/config/config.py | 1 + src/engine/direct.py | 6 +- src/engine/generic.py | 4 +- src/engine/instagram.py | 7 +- src/utils/http_client.py | 153 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 172 insertions(+), 7 deletions(-) create mode 100644 src/utils/http_client.py diff --git a/.env.example b/.env.example index c46b9245..793b21f9 100644 --- a/.env.example +++ b/.env.example @@ -36,6 +36,9 @@ M3U8_SUPPORT=False # Enable Aria2 for downloads (True/False) ENABLE_ARIA2=False +# Enable Cloudflare bypass for direct downloads (True/False) +BYPASS_CLOUDFLARE=True + # Path to Rclone executable RCLONE_PATH= diff --git a/pyproject.toml b/pyproject.toml index 5aefbb20..dd236a83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Default template for PDM package" authors = [ {name = "Benny", email = "benny.think@gmail.com"}, ] -dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1"] +dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "cloudscraper>=1.2.71"] requires-python = ">=3.10" readme = "README.md" license = {text = "Apache2.0"} diff --git a/requirements.txt b/requirements.txt index 54783940..d54dde76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ psutil>=7.2.2 ffpb>=0.4.1 cryptography>=46.0.4 kurigram==2.2.18 -yt-dlp[default,curl-cffi]==2026.1.31 \ No newline at end of file +yt-dlp[default,curl-cffi]==2026.1.31 +cloudscraper>=1.2.71 \ No newline at end of file diff --git a/src/config/config.py b/src/config/config.py index ccf130f9..85abdca4 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -39,6 +39,7 @@ def get_env(name: str, default=None): AUDIO_FORMAT = get_env("AUDIO_FORMAT", "m4a") M3U8_SUPPORT = get_env("M3U8_SUPPORT") ENABLE_ARIA2 = get_env("ENABLE_ARIA2") +BYPASS_CLOUDFLARE = get_env("BYPASS_CLOUDFLARE", True) RCLONE_PATH = get_env("RCLONE") diff --git a/src/engine/direct.py b/src/engine/direct.py index 0388ad45..8f2d6c73 100644 --- a/src/engine/direct.py +++ b/src/engine/direct.py @@ -15,8 +15,9 @@ import filetype import requests -from config import ENABLE_ARIA2, TMPFILE_PATH +from config import BYPASS_CLOUDFLARE, ENABLE_ARIA2, TMPFILE_PATH from engine.base import BaseDownloader +from utils.http_client import get_http_client class DirectDownload(BaseDownloader): @@ -40,7 +41,8 @@ def _setup_formats(self) -> list | None: def _requests_download(self): logging.info("Requests download with url %s", self._url) - response = requests.get(self._url, stream=True) + client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE) + response = client.get(self._url, stream=True) response.raise_for_status() file = Path(self._tempdir.name).joinpath(uuid4().hex) with open(file, "wb") as f: diff --git a/src/engine/generic.py b/src/engine/generic.py index bbcf3d7b..ab3d0864 100644 --- a/src/engine/generic.py +++ b/src/engine/generic.py @@ -9,10 +9,11 @@ import yt_dlp -from config import AUDIO_FORMAT +from config import AUDIO_FORMAT, BYPASS_CLOUDFLARE from utils import is_youtube from database.model import get_format_settings, get_quality_settings from engine.base import BaseDownloader +from utils.http_client import get_realistic_headers def match_filter(info_dict): @@ -103,6 +104,7 @@ def _download(self, formats) -> list: "embed_metadata": True, "embed_thumbnail": True, "writethumbnail": False, + "http_headers": get_realistic_headers(), } # setup cookies for youtube only if is_youtube(self._url): diff --git a/src/engine/instagram.py b/src/engine/instagram.py index 216c71e0..ae1f8dc8 100644 --- a/src/engine/instagram.py +++ b/src/engine/instagram.py @@ -9,7 +9,9 @@ import filetype import requests +from config import BYPASS_CLOUDFLARE from engine.base import BaseDownloader +from utils.http_client import get_http_client class InstagramDownload(BaseDownloader): @@ -44,7 +46,8 @@ def _setup_formats(self) -> list | None: def _download(self, formats=None): try: - resp = requests.get(f"http://instagram:15000/?url={self._url}").json() + client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE) + resp = client.get(f"http://instagram:15000/?url={self._url}").json() except Exception as e: self._bot_msg.edit_text(f"Download failed!❌\n\n`{e}`") pass @@ -69,7 +72,7 @@ def _download(self, formats=None): continue try: - req = requests.get(link, stream=True) + req = client.get(link, stream=True) length = int(req.headers.get("content-length", 0) or req.headers.get("x-full-image-content-length", 0)) filename = f"Instagram_{code}-{counter}" save_path = pathlib.Path(self._tempdir.name, filename) diff --git a/src/utils/http_client.py b/src/utils/http_client.py new file mode 100644 index 00000000..f6672fc8 --- /dev/null +++ b/src/utils/http_client.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# ytdlbot - http_client.py +# HTTP client with Cloudflare bypass support + +__author__ = "yeshua-aguilar" + +import logging +from typing import Optional + +import cloudscraper +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + + +class BypassHTTPClient: + """HTTP client that automatically bypasses Cloudflare protection.""" + + def __init__(self, bypass_enabled: bool = True, timeout: int = 30): + self._bypass_enabled = bypass_enabled + self._timeout = timeout + self._session: Optional[requests.Session] = None + self._scraper: Optional[cloudscraper.CloudScraper] = None + + self._user_agent = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ) + + self._headers = { + "User-Agent": self._user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + + def _create_scraper(self) -> cloudscraper.CloudScraper: + scraper = cloudscraper.create_scraper( + browser={ + "browser": "chrome", + "platform": "windows", + "desktop": True, + }, + delay=10, + ) + scraper.headers.update(self._headers) + return scraper + + def _create_session(self) -> requests.Session: + session = requests.Session() + session.headers.update(self._headers) + + retry_strategy = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504], + ) + adapter = HTTPAdapter(max_retries=retry_strategy) + session.mount("http://", adapter) + session.mount("https://", adapter) + + return session + + def get(self, url: str, **kwargs) -> requests.Response: + """Make a GET request with Cloudflare bypass if needed.""" + kwargs.setdefault("timeout", self._timeout) + + if self._bypass_enabled: + return self._bypass_get(url, **kwargs) + return self._normal_get(url, **kwargs) + + def _bypass_get(self, url: str, **kwargs) -> requests.Response: + """Try Cloudflare bypass first, fallback to normal request.""" + try: + if self._scraper is None: + self._scraper = self._create_scraper() + + logging.debug("Attempting Cloudflare bypass for %s", url) + response = self._scraper.get(url, **kwargs) + + if response.status_code == 403 and "cloudflare" in response.text.lower(): + logging.warning("Cloudflare bypass failed, trying normal request") + return self._normal_get(url, **kwargs) + + return response + except Exception as e: + logging.warning("Cloudflare bypass error: %s, falling back to normal request", e) + return self._normal_get(url, **kwargs) + + def _normal_get(self, url: str, **kwargs) -> requests.Response: + """Make a normal GET request without bypass.""" + if self._session is None: + self._session = self._create_session() + + return self._session.get(url, **kwargs) + + def close(self): + """Close all sessions.""" + if self._session: + self._session.close() + self._session = None + if self._scraper: + self._scraper.close() + self._scraper = None + + +_client_instance: Optional[BypassHTTPClient] = None + + +def get_http_client(bypass_enabled: bool = True) -> BypassHTTPClient: + """Get or create a shared HTTP client instance.""" + global _client_instance + if _client_instance is None: + _client_instance = BypassHTTPClient(bypass_enabled=bypass_enabled) + return _client_instance + + +def get_cloudflare_bypass_session() -> cloudscraper.CloudScraper: + """Get a CloudScraper session for yt-dlp or other libraries.""" + scraper = cloudscraper.create_scraper( + browser={ + "browser": "chrome", + "platform": "windows", + "desktop": True, + }, + delay=10, + ) + return scraper + + +def get_realistic_headers() -> dict: + """Get realistic browser headers for manual use.""" + return { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + ), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + } From 9dbb3670ffc43958f4a06630282ce3cd4cb6c588 Mon Sep 17 00:00:00 2001 From: yeshuaaguilar Date: Sat, 21 Feb 2026 21:55:21 -0500 Subject: [PATCH 4/5] feat: add test for Cloudflare bypass functionality --- src/test_cloudflare_bypass.py | 61 +++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/test_cloudflare_bypass.py diff --git a/src/test_cloudflare_bypass.py b/src/test_cloudflare_bypass.py new file mode 100644 index 00000000..e6d48d51 --- /dev/null +++ b/src/test_cloudflare_bypass.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +# coding: utf-8 + +# ytdlbot - test_cloudflare_bypass.py +# Test script for Cloudflare bypass functionality + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent / "src")) + +from utils.http_client import BypassHTTPClient, get_realistic_headers + + +def test_cloudflare_bypass(): + print("=" * 50) + print("Testing Cloudflare Bypass") + print("=" * 50) + + test_sites = [ + ("https://nowsecure.nl/", "NowSecure (Cloudflare protected)"), + ("https://www.google.com/", "Google (no protection)"), + ] + + client = BypassHTTPClient(bypass_enabled=True) + + for url, description in test_sites: + print(f"\nTesting: {description}") + print(f"URL: {url}") + + try: + resp = client.get(url, timeout=10) + print(f"Status: {resp.status_code}") + print(f"Content length: {len(resp.text)} chars") + print("Result: OK") + except Exception as e: + print(f"Error: {e}") + print("Result: FAILED") + + client.close() + print("\n" + "=" * 50) + print("Test completed!") + print("=" * 50) + + +def test_headers(): + print("\n" + "=" * 50) + print("Testing Realistic Headers") + print("=" * 50) + + headers = get_realistic_headers() + + for key, value in headers.items(): + print(f"{key}: {value}") + + print("\nResult: OK") + + +if __name__ == "__main__": + test_cloudflare_bypass() + test_headers() From ecb5f3383ebe23f7af9c8d82590d994397b36604 Mon Sep 17 00:00:00 2001 From: yeshuaaguilar Date: Sun, 22 Feb 2026 15:57:20 -0500 Subject: [PATCH 5/5] refactor: switch to ai-cloudscraper, scope bypass to direct.py only --- pyproject.toml | 2 +- requirements.txt | 2 +- src/engine/generic.py | 4 +--- src/engine/instagram.py | 7 ++----- src/test_cloudflare_bypass.py | 30 +++++++++++++++++++++++++++--- src/utils/http_client.py | 2 +- 6 files changed, 33 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dd236a83..9b7b2c90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ description = "Default template for PDM package" authors = [ {name = "Benny", email = "benny.think@gmail.com"}, ] -dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "cloudscraper>=1.2.71"] +dependencies = ["tgcrypto>=1.2.5", "yt-dlp[curl-cffi,default]==2026.1.31", "APScheduler>=3.11.2", "ffmpeg-python>=0.2.0", "PyMySQL>=1.1.1", "filetype>=1.2.0", "beautifulsoup4>=4.14.3", "fakeredis>=2.33.0", "redis==6.4.0", "requests>=2.32.5", "tqdm==4.67.2", "token-bucket>=0.3.0", "python-dotenv>=1.0.1", "black>=24.10.0", "sqlalchemy>=2.0.36", "psutil==7.2.2", "ffpb>=0.4.1", "kurigram==2.2.18", "cryptography>=46.0.4", "greenlet==3.3.1", "ai-cloudscraper>=3.8.4"] requires-python = ">=3.10" readme = "README.md" license = {text = "Apache2.0"} diff --git a/requirements.txt b/requirements.txt index d54dde76..96cc9b20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,4 @@ ffpb>=0.4.1 cryptography>=46.0.4 kurigram==2.2.18 yt-dlp[default,curl-cffi]==2026.1.31 -cloudscraper>=1.2.71 \ No newline at end of file +ai-cloudscraper>=3.8.4 \ No newline at end of file diff --git a/src/engine/generic.py b/src/engine/generic.py index ab3d0864..bbcf3d7b 100644 --- a/src/engine/generic.py +++ b/src/engine/generic.py @@ -9,11 +9,10 @@ import yt_dlp -from config import AUDIO_FORMAT, BYPASS_CLOUDFLARE +from config import AUDIO_FORMAT from utils import is_youtube from database.model import get_format_settings, get_quality_settings from engine.base import BaseDownloader -from utils.http_client import get_realistic_headers def match_filter(info_dict): @@ -104,7 +103,6 @@ def _download(self, formats) -> list: "embed_metadata": True, "embed_thumbnail": True, "writethumbnail": False, - "http_headers": get_realistic_headers(), } # setup cookies for youtube only if is_youtube(self._url): diff --git a/src/engine/instagram.py b/src/engine/instagram.py index ae1f8dc8..216c71e0 100644 --- a/src/engine/instagram.py +++ b/src/engine/instagram.py @@ -9,9 +9,7 @@ import filetype import requests -from config import BYPASS_CLOUDFLARE from engine.base import BaseDownloader -from utils.http_client import get_http_client class InstagramDownload(BaseDownloader): @@ -46,8 +44,7 @@ def _setup_formats(self) -> list | None: def _download(self, formats=None): try: - client = get_http_client(bypass_enabled=BYPASS_CLOUDFLARE) - resp = client.get(f"http://instagram:15000/?url={self._url}").json() + resp = requests.get(f"http://instagram:15000/?url={self._url}").json() except Exception as e: self._bot_msg.edit_text(f"Download failed!❌\n\n`{e}`") pass @@ -72,7 +69,7 @@ def _download(self, formats=None): continue try: - req = client.get(link, stream=True) + req = requests.get(link, stream=True) length = int(req.headers.get("content-length", 0) or req.headers.get("x-full-image-content-length", 0)) filename = f"Instagram_{code}-{counter}" save_path = pathlib.Path(self._tempdir.name, filename) diff --git a/src/test_cloudflare_bypass.py b/src/test_cloudflare_bypass.py index e6d48d51..e7221dd9 100644 --- a/src/test_cloudflare_bypass.py +++ b/src/test_cloudflare_bypass.py @@ -2,19 +2,19 @@ # coding: utf-8 # ytdlbot - test_cloudflare_bypass.py -# Test script for Cloudflare bypass functionality +# Test script for Cloudflare bypass functionality using ai-cloudscraper import sys from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent / "src")) +sys.path.insert(0, str(Path(__file__).parent)) from utils.http_client import BypassHTTPClient, get_realistic_headers def test_cloudflare_bypass(): print("=" * 50) - print("Testing Cloudflare Bypass") + print("Testing Cloudflare Bypass (ai-cloudscraper)") print("=" * 50) test_sites = [ @@ -56,6 +56,30 @@ def test_headers(): print("\nResult: OK") +def test_direct_download_usage(): + print("\n" + "=" * 50) + print("Testing DirectDownload Usage Pattern") + print("=" * 50) + + from utils.http_client import get_http_client + + client = get_http_client(bypass_enabled=True) + + print("\nTesting single instance pattern:") + print(f"Client type: {type(client).__name__}") + + try: + resp = client.get("https://httpbin.org/headers", timeout=5) + print(f"Status: {resp.status_code}") + print("Result: OK") + except Exception as e: + print(f"Error: {e}") + print("Result: FAILED") + + print("\n" + "=" * 50) + + if __name__ == "__main__": test_cloudflare_bypass() test_headers() + test_direct_download_usage() diff --git a/src/utils/http_client.py b/src/utils/http_client.py index f6672fc8..dcee388b 100644 --- a/src/utils/http_client.py +++ b/src/utils/http_client.py @@ -2,7 +2,7 @@ # coding: utf-8 # ytdlbot - http_client.py -# HTTP client with Cloudflare bypass support +# HTTP client with Cloudflare bypass support using ai-cloudscraper __author__ = "yeshua-aguilar"