Spaces:
Build error
Build error
Commit
Β·
fc3376f
1
Parent(s):
545acf0
Refactor app.py to remove flash-attn installation; enhance Gradio interface with styled HTML header and detailed model information section.
Browse files
app.py
CHANGED
|
@@ -4,9 +4,6 @@ from transformers import AutoTokenizer, AutoProcessor, AutoModelForImageTextToTe
|
|
| 4 |
import torch
|
| 5 |
import spaces
|
| 6 |
|
| 7 |
-
import subprocess
|
| 8 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 9 |
-
|
| 10 |
model_path = "nanonets/Nanonets-OCR-s"
|
| 11 |
|
| 12 |
# Load model once at startup
|
|
@@ -60,8 +57,15 @@ def ocr_image_gradio(image, max_tokens=4096):
|
|
| 60 |
|
| 61 |
# Create Gradio interface
|
| 62 |
with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
| 63 |
-
|
| 64 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
with gr.Row():
|
| 67 |
with gr.Column(scale=1):
|
|
@@ -84,7 +88,6 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
|
| 84 |
output_text = gr.Textbox(
|
| 85 |
label="Extracted Text",
|
| 86 |
lines=20,
|
| 87 |
-
max_lines=30,
|
| 88 |
show_copy_button=True,
|
| 89 |
placeholder="Extracted text will appear here..."
|
| 90 |
)
|
|
@@ -103,6 +106,48 @@ with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
|
| 103 |
outputs=output_text,
|
| 104 |
show_progress=True
|
| 105 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
if __name__ == "__main__":
|
| 108 |
demo.queue().launch()
|
|
|
|
| 4 |
import torch
|
| 5 |
import spaces
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
model_path = "nanonets/Nanonets-OCR-s"
|
| 8 |
|
| 9 |
# Load model once at startup
|
|
|
|
| 57 |
|
| 58 |
# Create Gradio interface
|
| 59 |
with gr.Blocks(title="Nanonets OCR Demo", theme=gr.themes.Soft()) as demo:
|
| 60 |
+
# Replace simple markdown with styled HTML header
|
| 61 |
+
gr.HTML("""
|
| 62 |
+
<div class="title" style="text-align: center">
|
| 63 |
+
<h1>π Nanonets OCR - Document Text Extraction</h1>
|
| 64 |
+
<p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
|
| 65 |
+
A state-of-the-art image-to-markdown OCR model for intelligent document processing
|
| 66 |
+
</p>
|
| 67 |
+
</div>
|
| 68 |
+
""")
|
| 69 |
|
| 70 |
with gr.Row():
|
| 71 |
with gr.Column(scale=1):
|
|
|
|
| 88 |
output_text = gr.Textbox(
|
| 89 |
label="Extracted Text",
|
| 90 |
lines=20,
|
|
|
|
| 91 |
show_copy_button=True,
|
| 92 |
placeholder="Extracted text will appear here..."
|
| 93 |
)
|
|
|
|
| 106 |
outputs=output_text,
|
| 107 |
show_progress=True
|
| 108 |
)
|
| 109 |
+
|
| 110 |
+
# Add model information section
|
| 111 |
+
with gr.Accordion("About Nanonets-OCR-s", open=False):
|
| 112 |
+
gr.Markdown("""
|
| 113 |
+
## Nanonets-OCR-s
|
| 114 |
+
|
| 115 |
+
Nanonets-OCR-s is a powerful, state-of-the-art image-to-markdown OCR model that goes far beyond traditional text extraction.
|
| 116 |
+
It transforms documents into structured markdown with intelligent content recognition and semantic tagging, making it ideal
|
| 117 |
+
for downstream processing by Large Language Models (LLMs).
|
| 118 |
+
|
| 119 |
+
### Key Features
|
| 120 |
+
|
| 121 |
+
- **LaTeX Equation Recognition**: Automatically converts mathematical equations and formulas into properly formatted LaTeX syntax.
|
| 122 |
+
It distinguishes between inline ($...$) and display ($$...$$) equations.
|
| 123 |
+
|
| 124 |
+
- **Intelligent Image Description**: Describes images within documents using structured `<img>` tags, making them digestible
|
| 125 |
+
for LLM processing. It can describe various image types, including logos, charts, graphs and so on, detailing their content,
|
| 126 |
+
style, and context.
|
| 127 |
+
|
| 128 |
+
- **Signature Detection & Isolation**: Identifies and isolates signatures from other text, outputting them within a `<signature>` tag.
|
| 129 |
+
This is crucial for processing legal and business documents.
|
| 130 |
+
|
| 131 |
+
- **Watermark Extraction**: Detects and extracts watermark text from documents, placing it within a `<watermark>` tag.
|
| 132 |
+
|
| 133 |
+
- **Smart Checkbox Handling**: Converts form checkboxes and radio buttons into standardized Unicode symbols (β, β, β)
|
| 134 |
+
for consistent and reliable processing.
|
| 135 |
+
|
| 136 |
+
- **Complex Table Extraction**: Accurately extracts complex tables from documents and converts them into both markdown
|
| 137 |
+
and HTML table formats.
|
| 138 |
+
""")
|
| 139 |
+
|
| 140 |
+
# Add links section at the bottom
|
| 141 |
+
gr.Markdown("""
|
| 142 |
+
---
|
| 143 |
+
### Resources
|
| 144 |
+
|
| 145 |
+
- [π Hugging Face Model](https://huggingface.co/nanonets/Nanonets-OCR-s)
|
| 146 |
+
- [π Release Blog](https://nanonets.com/research/nanonets-ocr-s/)
|
| 147 |
+
- [π» GitHub Repository](https://github.com/NanoNets/docext)
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
""")
|
| 151 |
|
| 152 |
if __name__ == "__main__":
|
| 153 |
demo.queue().launch()
|