Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vlmeval/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .together import TogetherAPI
from .gcp_vertex import GCPVertexAPI
from .bedrock import BedrockAPI
from .deepocr_api import DeepOCRAPI

__all__ = [
'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
Expand All @@ -40,4 +41,5 @@
'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI',
'DeepOCRAPI',
]
51 changes: 51 additions & 0 deletions vlmeval/api/deepocr_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

import os

from .gpt import OpenAIWrapper


class DeepOCRAPI(OpenAIWrapper):
"""OpenAI-compatible API wrapper for DeepOCR pipeline endpoint.

Credentials and endpoint are provided only via environment variables:
- DEEPOCR_API_BASE
- DEEPOCR_API_KEY
"""

is_api: bool = True

def __init__(
self,
model: str = "deepocr",
retry: int = 5,
verbose: bool = False,
system_prompt: str | None = None,
temperature: float = 0,
timeout: int = 300,
max_tokens: int = 2048,
img_size: int = -1,
img_detail: str = "high",
**kwargs,
):
api_base = os.getenv("DEEPOCR_API_BASE", "")
api_key = os.getenv("DEEPOCR_API_KEY", "")
if not api_base or not api_key:
raise ValueError(
"DEEPOCR_API_BASE and DEEPOCR_API_KEY must be set in the environment."
)

super().__init__(
model=model,
retry=retry,
key=api_key,
verbose=verbose,
system_prompt=system_prompt,
temperature=temperature,
timeout=timeout,
api_base=api_base,
max_tokens=max_tokens,
img_size=img_size,
img_detail=img_detail,
**kwargs,
)
9 changes: 9 additions & 0 deletions vlmeval/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@
retry=10,
verbose=False,
),
"DEEPOCR": partial(
DeepOCRAPI,
model="gpt-4-1106-vision-preview",
temperature=0,
img_size=-1,
img_detail="high",
retry=10,
verbose=False,
),
"GPT4V_20240409": partial(
GPT4V,
model="gpt-4-turbo-2024-04-09",
Expand Down