open-compass · leejooan · Mar 4, 2026 · Mar 4, 2026
diff --git a/vlmeval/api/__init__.py b/vlmeval/api/__init__.py
@@ -29,6 +29,7 @@
 from .together import TogetherAPI
 from .gcp_vertex import GCPVertexAPI
 from .bedrock import BedrockAPI
+from .deepocr_api import DeepOCRAPI
 
 __all__ = [
     'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
@@ -40,4 +41,5 @@
     'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
     'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
     'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI',
+    'DeepOCRAPI',
 ]
diff --git a/vlmeval/api/deepocr_api.py b/vlmeval/api/deepocr_api.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import os
+
+from .gpt import OpenAIWrapper
+
+
+class DeepOCRAPI(OpenAIWrapper):
+    """OpenAI-compatible API wrapper for DeepOCR pipeline endpoint.
+
+    Credentials and endpoint are provided only via environment variables:
+      - DEEPOCR_API_BASE
+      - DEEPOCR_API_KEY
+    """
+
+    is_api: bool = True
+
+    def __init__(
+        self,
+        model: str = "deepocr",
+        retry: int = 5,
+        verbose: bool = False,
+        system_prompt: str | None = None,
+        temperature: float = 0,
+        timeout: int = 300,
+        max_tokens: int = 2048,
+        img_size: int = -1,
+        img_detail: str = "high",
+        **kwargs,
+    ):
+        api_base = os.getenv("DEEPOCR_API_BASE", "")
+        api_key = os.getenv("DEEPOCR_API_KEY", "")
+        if not api_base or not api_key:
+            raise ValueError(
+                "DEEPOCR_API_BASE and DEEPOCR_API_KEY must be set in the environment."
+            )
+
+        super().__init__(
+            model=model,
+            retry=retry,
+            key=api_key,
+            verbose=verbose,
+            system_prompt=system_prompt,
+            temperature=temperature,
+            timeout=timeout,
+            api_base=api_base,
+            max_tokens=max_tokens,
+            img_size=img_size,
+            img_detail=img_detail,
+            **kwargs,
+        )
diff --git a/vlmeval/config.py b/vlmeval/config.py
@@ -144,6 +144,15 @@
         retry=10,
         verbose=False,
     ),
+    "DEEPOCR": partial(
+        DeepOCRAPI,
+        model="gpt-4-1106-vision-preview",
+        temperature=0,
+        img_size=-1,
+        img_detail="high",
+        retry=10,
+        verbose=False,
+    ),
     "GPT4V_20240409": partial(
         GPT4V,
         model="gpt-4-turbo-2024-04-09",