Skip to content

Detector API

Tmob edited this page Jan 28, 2026 · 1 revision

Detector Python API

This guide covers how to use the text detection API separately from OCR recognition.

Overview

Kiri OCR provides a TextDetector class that can detect text regions in document images. It supports multiple detection backends:

  • DB (DBNet): Fast and accurate neural-network based detection (default)
  • CRAFT: Character Region Awareness detector
  • Legacy: Classic computer vision approach (no model required)

Quick Start

from kiri_ocr.detector import TextDetector

# Initialize detector (auto-downloads model from HuggingFace)
detector = TextDetector(method='db')

# Detect text lines
boxes = detector.detect_lines("document.jpg")
for (x, y, w, h) in boxes:
    print(f"Found text at: x={x}, y={y}, width={w}, height={h}")

Initialization Options

Basic Usage

from kiri_ocr.detector import TextDetector

# DB detector (recommended)
detector = TextDetector(method='db')

# CRAFT detector
detector = TextDetector(method='craft')

# Legacy (classic CV, no model needed)
detector = TextDetector(method='legacy')

With Custom Model Path

# Local model file
detector = TextDetector(method='db', model_path='models/detector.onnx')

# HuggingFace model
detector = TextDetector(method='db', model_path='mrrtmob/kiri-ocr')

With Configuration Options

detector = TextDetector(
    method='db',
    model_path='mrrtmob/kiri-ocr',
    conf_threshold=0.25,       # Confidence threshold
    det_db_thresh=0.3,         # Binary threshold
    det_db_box_thresh=0.5,     # Box threshold
    det_db_unclip_ratio=1.5,   # Box expansion ratio
    max_side_len=960,          # Max image dimension
    padding=5,                 # Padding around boxes (pixels)
)

Detection Methods

detect_lines()

Detect text lines and return bounding boxes.

# Returns list of (x, y, width, height) tuples
boxes = detector.detect_lines("document.jpg")

for x, y, w, h in boxes:
    print(f"Line at ({x}, {y}) - {w}x{h}")

detect_lines_objects()

Detect text lines and return TextBox objects with confidence scores.

# Returns list of TextBox objects
boxes = detector.detect_lines_objects("document.jpg")

for box in boxes:
    print(f"Line at ({box.x}, {box.y}) - confidence: {box.confidence:.2%}")
    print(f"  BBox: {box.bbox}")        # (x, y, w, h)
    print(f"  XYXY: {box.xyxy}")        # (x1, y1, x2, y2)
    print(f"  Area: {box.area}")
    print(f"  Center: {box.center}")

detect_words()

Detect individual words.

words = detector.detect_words("document.jpg")
for x, y, w, h in words:
    print(f"Word at ({x}, {y})")

detect_blocks()

Detect text blocks (paragraphs).

blocks = detector.detect_blocks("document.jpg")
for x, y, w, h in blocks:
    print(f"Block at ({x}, {y})")

detect_all()

Get full detection hierarchy (blocks → lines → words).

from kiri_ocr.detector import TextBox, DetectionLevel

all_boxes = detector.detect_all("document.jpg")
for box in all_boxes:
    if box.level == DetectionLevel.BLOCK:
        print(f"Block at ({box.x}, {box.y})")
        for line in box.children:
            print(f"  Line: {line.bbox}")

is_multiline()

Check if image contains multiple text lines.

if detector.is_multiline("image.jpg"):
    print("Document has multiple lines")
else:
    print("Single line image")

TextBox Object

The TextBox class provides rich information about detected regions:

from kiri_ocr.detector import TextBox, DetectionLevel

# TextBox properties
box = TextBox(x=10, y=20, width=100, height=30, confidence=0.95)

box.x             # X coordinate
box.y             # Y coordinate
box.width         # Width
box.height        # Height
box.confidence    # Detection confidence (0.0-1.0)
box.level         # DetectionLevel enum (LINE, WORD, BLOCK, etc.)

# Computed properties
box.bbox          # (x, y, width, height)
box.xyxy          # (x1, y1, x2, y2)
box.area          # width * height
box.center        # (center_x, center_y)
box.baseline_y    # Approximate text baseline

Convenience Functions

For quick one-off detection:

from kiri_ocr.detector import detect_text_lines, detect_text_words, detect_text_blocks

# Detect lines
lines = detect_text_lines("document.jpg", method='db')

# Detect words
words = detect_text_words("document.jpg", method='legacy')

# Detect blocks
blocks = detect_text_blocks("document.jpg", method='db')

Using with NumPy Arrays

The detector accepts both file paths and NumPy arrays:

import cv2
import numpy as np

# Load image with OpenCV
img = cv2.imread("document.jpg")

# Detect text
boxes = detector.detect_lines(img)

Integration with OCR

Use the detector separately from recognition:

from kiri_ocr import OCR
from kiri_ocr.detector import TextDetector
import cv2

# Initialize separately
detector = TextDetector(method='db')
ocr = OCR()

# Load image
img = cv2.imread("document.jpg")

# Custom detection
boxes = detector.detect_lines_objects(img)

# Filter by confidence
high_conf_boxes = [b for b in boxes if b.confidence > 0.7]

# Recognize each box
for box in high_conf_boxes:
    x, y, w, h = box.bbox
    crop = img[y:y+h, x:x+w]
    
    # Convert to PIL for recognition
    from PIL import Image
    pil_crop = Image.fromarray(cv2.cvtColor(crop, cv2.COLOR_BGR2RGB))
    
    text, conf = ocr.recognize_single_line_image(pil_crop)
    print(f"[{box.confidence:.0%}] {text}")

Visualization

Draw detection boxes on image:

import cv2

img = cv2.imread("document.jpg")
boxes = detector.detect_lines_objects(img)

for box in boxes:
    x, y, w, h = box.bbox
    color = (0, 255, 0) if box.confidence > 0.5 else (0, 165, 255)
    cv2.rectangle(img, (x, y), (x+w, y+h), color, 2)
    cv2.putText(img, f"{box.confidence:.0%}", (x, y-5), 
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

cv2.imwrite("detection_result.jpg", img)

DB Detector Parameters

Parameter Default Description
det_db_thresh 0.3 Binary threshold for text detection
det_db_box_thresh 0.5 Box confidence threshold
det_db_unclip_ratio 1.5 Box expansion ratio
max_side_len 960 Maximum image dimension
min_size 3 Minimum box size
use_gpu False Use GPU for inference

Error Handling

from kiri_ocr.detector import TextDetector

try:
    detector = TextDetector(method='db')
    boxes = detector.detect_lines("nonexistent.jpg")
except Exception as e:
    print(f"Detection failed: {e}")

If a detector method fails, it automatically falls back to the legacy detector:

# If DB model not found, falls back to legacy automatically
detector = TextDetector(method='db', model_path='invalid/path.onnx')
boxes = detector.detect_lines("document.jpg")  # Uses legacy detector

Clone this wiki locally