NanoNets
diff --git a/‎README.md‎
Lines changed: 445 additions & 289 deletions b/‎README.md‎
Lines changed: 445 additions & 289 deletions
diff --git a/‎api.md‎
Lines changed: 396 additions & 27 deletions b/‎api.md‎
Lines changed: 396 additions & 27 deletions
diff --git a/‎examples/README.md‎
Lines changed: 83 additions & 0 deletions b/‎examples/README.md‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎examples/async_with_polling.py‎
Lines changed: 103 additions & 0 deletions b/‎examples/async_with_polling.py‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎examples/basic_extraction.py‎
Lines changed: 58 additions & 0 deletions b/‎examples/basic_extraction.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎examples/document_classification.py‎
Lines changed: 109 additions & 0 deletions b/‎examples/document_classification.py‎
Lines changed: 109 additions & 0 deletions
@@ -0,0 +1,83 @@
+# Docstrange Python SDK Examples
+
+This directory contains practical examples demonstrating common use cases for the Docstrange Python SDK.
+
+## Prerequisites
+
+1. Install the SDK:
+   ```bash
+   pip install docstrange-api
+   ```
+
+2. Set your API key:
+   ```bash
+   export DOCSTRANGE_API_KEY="your-api-key"
+   ```
+
+## Examples
+
+### Basic Extraction
+**File:** `basic_extraction.py`
+
+The simplest way to extract content from a document as Markdown.
+
+```bash
+python basic_extraction.py invoice.pdf
+```
+
+### JSON Extraction
+**File:** `json_extraction.py`
+
+Extract structured data with specific fields from invoices or forms.
+
+```bash
+python json_extraction.py invoice.pdf
+```
+
+### Streaming Extraction
+**File:** `streaming_extraction.py`
+
+Real-time extraction using Server-Sent Events. Content is displayed progressively as it's generated.
+
+```bash
+python streaming_extraction.py large_document.pdf
+```
+
+### Async with Polling
+**File:** `async_with_polling.py`
+
+Submit a document for async processing and poll for completion. Useful for large documents.
+
+```bash
+python async_with_polling.py large_document.pdf
+```
+
+### Document Classification
+**File:** `document_classification.py`
+
+Classify documents into custom categories with confidence scores.
+
+```bash
+python document_classification.py unknown_document.pdf
+```
+
+## Running All Examples
+
+```bash
+# Make examples executable
+chmod +x *.py
+
+# Run with a test document
+./basic_extraction.py test.pdf
+./json_extraction.py invoice.pdf
+./streaming_extraction.py document.pdf
+./document_classification.py document.pdf
+./async_with_polling.py large_doc.pdf
+```
+
+## Tips
+
+- **Input methods**: Examples use `file=` (binary), `file_base64=`, or `file_url=` depending on use case
+- **Multiple formats**: Request multiple outputs with `output_format="markdown,json"`
+- **Metadata**: Add `include_metadata="bounding_boxes"` for coordinate data
+- **Custom extraction**: Use `custom_instructions` for specific extraction requirements
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+"""
+Async Extraction with Polling Example
+
+This example demonstrates asynchronous document processing with polling.
+Useful for large documents that take longer to process.
+
+Usage:
+    export DOCSTRANGE_API_KEY="your-api-key"
+    python async_with_polling.py document.pdf
+"""
+
+import base64
+import sys
+import time
+from pathlib import Path
+
+from docstrange import Docstrange
+
+
+def poll_until_complete(client: Docstrange, record_id: str, timeout: int = 120):
+    """Poll for extraction completion with timeout."""
+    start_time = time.time()
+    poll_interval = 2  # seconds
+
+    while time.time() - start_time < timeout:
+        result = client.extract.results.retrieve(record_id)
+
+        if result.status == "completed":
+            return result
+        elif result.status == "failed":
+            raise Exception(f"Extraction failed: {result.message}")
+
+        # Show progress
+        elapsed = int(time.time() - start_time)
+        print(f"  Status: {result.status} (elapsed: {elapsed}s)", end="\r")
+
+        time.sleep(poll_interval)
+
+    raise TimeoutError(f"Extraction did not complete within {timeout} seconds")
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python async_with_polling.py <file_path>")
+        sys.exit(1)
+
+    file_path = Path(sys.argv[1])
+    if not file_path.exists():
+        print(f"Error: File not found: {file_path}")
+        sys.exit(1)
+
+    client = Docstrange()
+
+    # Read file as base64
+    with open(file_path, "rb") as f:
+        file_base64 = base64.b64encode(f.read()).decode("utf-8")
+
+    print(f"Submitting for async processing: {file_path.name}")
+    print("-" * 50)
+
+    # Submit for async processing
+    job = client.extract.async_(
+        file_base64=file_base64,
+        output_format="markdown,json",  # Multiple formats
+    )
+
+    print(f"✓ Job submitted")
+    print(f"  Record ID: {job.record_id}")
+    print(f"  Status: {job.status}")
+    print()
+    print("Polling for completion...")
+
+    try:
+        result = poll_until_complete(client, job.record_id, timeout=120)
+        print()  # Clear the status line
+        print(f"\n✓ Extraction completed in {result.processing_time:.2f}s")
+        print(f"  Pages processed: {result.pages_processed}")
+
+        # Show results
+        print("\n" + "=" * 50)
+        print("MARKDOWN RESULT (first 500 chars):")
+        print("=" * 50)
+        markdown = result.result.markdown.content
+        print(markdown[:500] + ("..." if len(markdown) > 500 else ""))
+
+        print("\n" + "=" * 50)
+        print("JSON RESULT:")
+        print("=" * 50)
+        import json
+        print(json.dumps(result.result.json_.content, indent=2)[:1000])
+
+    except TimeoutError as e:
+        print(f"\n✗ {e}")
+        print(f"You can check the status later with record_id: {job.record_id}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n✗ Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+Basic Document Extraction Example
+
+This example demonstrates the simplest way to extract content from a document
+using the Docstrange Python SDK.
+
+Usage:
+    export DOCSTRANGE_API_KEY="your-api-key"
+    python basic_extraction.py document.pdf
+"""
+
+import sys
+from pathlib import Path
+
+from docstrange import Docstrange
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python basic_extraction.py <file_path>")
+        print("Example: python basic_extraction.py invoice.pdf")
+        sys.exit(1)
+
+    file_path = Path(sys.argv[1])
+    if not file_path.exists():
+        print(f"Error: File not found: {file_path}")
+        sys.exit(1)
+
+    # Initialize the client (reads DOCSTRANGE_API_KEY from environment)
+    client = Docstrange()
+
+    print(f"Extracting content from: {file_path.name}")
+    print("-" * 50)
+
+    # Extract content as Markdown
+    with open(file_path, "rb") as f:
+        response = client.extract.sync(
+            file=f,
+            output_format="markdown",
+        )
+
+    if response.success:
+        print(f"✓ Extraction completed in {response.processing_time:.2f}s")
+        print(f"  Record ID: {response.record_id}")
+        print(f"  Pages processed: {response.pages_processed}")
+        print()
+        print("=" * 50)
+        print("EXTRACTED CONTENT:")
+        print("=" * 50)
+        print(response.result.markdown.content)
+    else:
+        print(f"✗ Extraction failed: {response.message}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+Document Classification Example
+
+This example demonstrates how to classify documents into custom categories.
+
+Usage:
+    export DOCSTRANGE_API_KEY="your-api-key"
+    python document_classification.py document.pdf
+"""
+
+import json
+import sys
+from pathlib import Path
+
+from docstrange import Docstrange
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: python document_classification.py <file_path>")
+        sys.exit(1)
+
+    file_path = Path(sys.argv[1])
+    if not file_path.exists():
+        print(f"Error: File not found: {file_path}")
+        sys.exit(1)
+
+    client = Docstrange()
+
+    # Define classification categories
+    categories = [
+        {
+            "name": "Invoice",
+            "description": "Bills, invoices, payment requests, and receipts"
+        },
+        {
+            "name": "Contract",
+            "description": "Legal agreements, contracts, terms of service"
+        },
+        {
+            "name": "Resume",
+            "description": "CVs, resumes, job applications"
+        },
+        {
+            "name": "Report",
+            "description": "Business reports, analysis documents, presentations"
+        },
+        {
+            "name": "Letter",
+            "description": "Business letters, correspondence, memos"
+        },
+        {
+            "name": "Form",
+            "description": "Application forms, government forms, questionnaires"
+        },
+    ]
+
+    print(f"Classifying: {file_path.name}")
+    print(f"Categories: {', '.join(c['name'] for c in categories)}")
+    print("-" * 50)
+
+    with open(file_path, "rb") as f:
+        response = client.classify.sync(
+            file=f,
+            categories=json.dumps(categories),
+        )
+
+    if response.success and response.result:
+        result = response.result
+        print(f"\n✓ Classification completed")
+        print(f"  File: {result.filename}")
+        print(f"  Total pages: {result.total_pages}")
+        print(f"  Processing time: {result.processing_time:.2f}s")
+        print()
+
+        print("PAGE-BY-PAGE CLASSIFICATION:")
+        print("=" * 50)
+
+        for page in result.pages:
+            confidence_bar = "█" * (page.confidence // 10) + "░" * (10 - page.confidence // 10)
+            print(f"\nPage {page.page_number}:")
+            print(f"  Category:   {page.category}")
+            print(f"  Confidence: [{confidence_bar}] {page.confidence}%")
+            print(f"  Reasoning:  {page.reasoning}")
+
+            if page.identified_category:
+                print(f"  Note: Model identified as '{page.identified_category}' but categorized as 'Other'")
+
+        # Summary
+        print("\n" + "=" * 50)
+        print("SUMMARY:")
+        
+        # Count pages per category
+        category_counts = {}
+        for page in result.pages:
+            cat = page.category
+            category_counts[cat] = category_counts.get(cat, 0) + 1
+
+        for cat, count in sorted(category_counts.items(), key=lambda x: -x[1]):
+            print(f"  {cat}: {count} page(s)")
+
+    else:
+        print(f"✗ Classification failed: {response.message}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()