-
Notifications
You must be signed in to change notification settings - Fork 3
Detector API
This guide covers how to use the text detection API separately from OCR recognition.
Kiri OCR provides a TextDetector class that can detect text regions in document images. It supports multiple detection backends:
- DB (DBNet): Fast and accurate neural-network based detection (default)
- CRAFT: Character Region Awareness detector
- Legacy: Classic computer vision approach (no model required)
from kiri_ocr.detector import TextDetector
# Initialize detector (auto-downloads model from HuggingFace)
detector = TextDetector(method='db')
# Detect text lines
boxes = detector.detect_lines("document.jpg")
for (x, y, w, h) in boxes:
print(f"Found text at: x={x}, y={y}, width={w}, height={h}")from kiri_ocr.detector import TextDetector
# DB detector (recommended)
detector = TextDetector(method='db')
# CRAFT detector
detector = TextDetector(method='craft')
# Legacy (classic CV, no model needed)
detector = TextDetector(method='legacy')# Local model file
detector = TextDetector(method='db', model_path='models/detector.onnx')
# HuggingFace model
detector = TextDetector(method='db', model_path='mrrtmob/kiri-ocr')detector = TextDetector(
method='db',
model_path='mrrtmob/kiri-ocr',
conf_threshold=0.25, # Confidence threshold
det_db_thresh=0.3, # Binary threshold
det_db_box_thresh=0.5, # Box threshold
det_db_unclip_ratio=1.5, # Box expansion ratio
max_side_len=960, # Max image dimension
padding=5, # Padding around boxes (pixels)
)Detect text lines and return bounding boxes.
# Returns list of (x, y, width, height) tuples
boxes = detector.detect_lines("document.jpg")
for x, y, w, h in boxes:
print(f"Line at ({x}, {y}) - {w}x{h}")Detect text lines and return TextBox objects with confidence scores.
# Returns list of TextBox objects
boxes = detector.detect_lines_objects("document.jpg")
for box in boxes:
print(f"Line at ({box.x}, {box.y}) - confidence: {box.confidence:.2%}")
print(f" BBox: {box.bbox}") # (x, y, w, h)
print(f" XYXY: {box.xyxy}") # (x1, y1, x2, y2)
print(f" Area: {box.area}")
print(f" Center: {box.center}")Detect individual words.
words = detector.detect_words("document.jpg")
for x, y, w, h in words:
print(f"Word at ({x}, {y})")Detect text blocks (paragraphs).
blocks = detector.detect_blocks("document.jpg")
for x, y, w, h in blocks:
print(f"Block at ({x}, {y})")Get full detection hierarchy (blocks → lines → words).
from kiri_ocr.detector import TextBox, DetectionLevel
all_boxes = detector.detect_all("document.jpg")
for box in all_boxes:
if box.level == DetectionLevel.BLOCK:
print(f"Block at ({box.x}, {box.y})")
for line in box.children:
print(f" Line: {line.bbox}")Check if image contains multiple text lines.
if detector.is_multiline("image.jpg"):
print("Document has multiple lines")
else:
print("Single line image")The TextBox class provides rich information about detected regions:
from kiri_ocr.detector import TextBox, DetectionLevel
# TextBox properties
box = TextBox(x=10, y=20, width=100, height=30, confidence=0.95)
box.x # X coordinate
box.y # Y coordinate
box.width # Width
box.height # Height
box.confidence # Detection confidence (0.0-1.0)
box.level # DetectionLevel enum (LINE, WORD, BLOCK, etc.)
# Computed properties
box.bbox # (x, y, width, height)
box.xyxy # (x1, y1, x2, y2)
box.area # width * height
box.center # (center_x, center_y)
box.baseline_y # Approximate text baselineFor quick one-off detection:
from kiri_ocr.detector import detect_text_lines, detect_text_words, detect_text_blocks
# Detect lines
lines = detect_text_lines("document.jpg", method='db')
# Detect words
words = detect_text_words("document.jpg", method='legacy')
# Detect blocks
blocks = detect_text_blocks("document.jpg", method='db')The detector accepts both file paths and NumPy arrays:
import cv2
import numpy as np
# Load image with OpenCV
img = cv2.imread("document.jpg")
# Detect text
boxes = detector.detect_lines(img)Use the detector separately from recognition:
from kiri_ocr import OCR
from kiri_ocr.detector import TextDetector
import cv2
# Initialize separately
detector = TextDetector(method='db')
ocr = OCR()
# Load image
img = cv2.imread("document.jpg")
# Custom detection
boxes = detector.detect_lines_objects(img)
# Filter by confidence
high_conf_boxes = [b for b in boxes if b.confidence > 0.7]
# Recognize each box
for box in high_conf_boxes:
x, y, w, h = box.bbox
crop = img[y:y+h, x:x+w]
# Convert to PIL for recognition
from PIL import Image
pil_crop = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
text, conf = ocr.recognize_single_line_image(pil_crop)
print(f"[{box.confidence:.0%}] {text}")Draw detection boxes on image:
import cv2
img = cv2.imread("document.jpg")
boxes = detector.detect_lines_objects(img)
for box in boxes:
x, y, w, h = box.bbox
color = (0, 255, 0) if box.confidence > 0.5 else (0, 165, 255)
cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
cv2.putText(img, f"{box.confidence:.0%}", (x, y-5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
cv2.imwrite("detection_result.jpg", img)| Parameter | Default | Description |
|---|---|---|
det_db_thresh |
0.3 | Binary threshold for text detection |
det_db_box_thresh |
0.5 | Box confidence threshold |
det_db_unclip_ratio |
1.5 | Box expansion ratio |
max_side_len |
960 | Maximum image dimension |
min_size |
3 | Minimum box size |
use_gpu |
False | Use GPU for inference |
from kiri_ocr.detector import TextDetector
try:
detector = TextDetector(method='db')
boxes = detector.detect_lines("nonexistent.jpg")
except Exception as e:
print(f"Detection failed: {e}")If a detector method fails, it automatically falls back to the legacy detector:
# If DB model not found, falls back to legacy automatically
detector = TextDetector(method='db', model_path='invalid/path.onnx')
boxes = detector.detect_lines("document.jpg") # Uses legacy detectorKiri OCR Home | GitHub Repository | Report Issue
© 2026 Kiri OCR. Released under the Apache 2.0 License.