-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbbocrv2.py
192 lines (161 loc) Β· 7.29 KB
/
bbocrv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#-*- coding: utf-8 -*-
from __future__ import print_function
#---------------------------------------------------------------
# imports
#---------------------------------------------------------------
import streamlit as st
st.set_page_config(layout="wide")
import base64
from PIL import Image
import numpy as np
import requests
import pandas as pd
import cv2
from bbocrv2.ocr import ImageOCR
from bbocrv2.visualize import draw_word_polys,draw_document_layout
from bbocrv2.postprocessing import process_segments_and_words,construct_text_from_segments
from bbocrv2.htmlgen import generate_html
from apsisocr.utils import correctPadding
#--------------------------------------------------
# main
#--------------------------------------------------
@st.cache_resource
def load_model():
ocr=ImageOCR()
return ocr
ocr=load_model()
def get_data_url(img_path):
file_ = open(img_path, "rb")
contents = file_.read()
data_url = base64.b64encode(contents).decode("utf-8")
file_.close()
return data_url
# Markdown with icons
flowchart= """
### OCR & Document Layout Analysis System Flow
The following sequence represents the flow of the OCR and Document Layout Analysis system in a circular process.
| **Step** | **Description** |
|----------------------|-----------------|
| π§βπ» **User** | Provides the input image (file path or numpy array) |
| π» **System** | Reads and converts the image to RGB |
| π **PaddleDBNet** | Detects word regions in the image |
| π **RotationCorrection** | Applies automated rotation correction to the image |
| π’ **DBScan** | Applies reading order detection for words and text |
| π§ **APSISNet** | Performs text recognition on correctly rotated word boxes |
| π **YOLOv8** | Performs document layout segmentation on the image |
| π **Merging** | Merges document segments using vectorized IoU calculation |
| ποΈ **LayoutHandling** | Checks intersection of merged segments with detected words |
| π **HTMLReconstruction** | Generates the final HTML layout for the image |
| π **System** | Returns the final output (HTML with words and document layout) to the user |
"""
team="""
---
# Team Members
| Name | Department | Registration Number |
|--------------------------|---------------------|---------------------|
| **Shattik Bandyopadhyaa** | Software Engineering| 2019831039 |
| **Anabil Debnath** | Software Engineering| 2019831071 |
---
"""
module="""
| Task | Model | Module |
|-----------|-----------|-----------|
| Text Detection | Differential Binarizer (Word)| PaddleOCR |
| Text Recognition| ApsisNet (Bangla)|ApsisOCR |
| Document Layout Analysis| Yolov8-DLA (DLSprint-BadLad)| BBOCR |
| Reading Order detection | DBScan | BBOCRv2|
| HTML Reconstruction | Custom | BBOCRv2|
"""
def main():
st.title("ΰ¦ΰ¦Ώΰ¦€ΰ§ΰ¦°ΰ¦²ΰ¦Ώΰ¦ͺΰ¦Ώ")
st.markdown(" ### Improved Bangla text word detection,recognition ,layout analysis , reading order and HTML Reconstruction")
with st.sidebar:
# Display the Mermaid flowchart diagram
st.markdown(flowchart, unsafe_allow_html=True)
# Intro section
st.markdown(team)
# Info section with table
st.markdown("# **Module and Model List**")
st.markdown(module)
st.markdown("---")
st.markdown("## **Industry Partner**")
st.markdown(f'<img src="data:image/gif;base64,{get_data_url("resources/apsis.png")}" alt="apsis">'+' [apsis solutions limited](https://apsissolutions.com/)',unsafe_allow_html=True)
st.markdown("## **Research Collaboration**")
st.markdown(f'<img src="data:image/gif;base64,{get_data_url("resources/bengaliai.png")}" alt="apsis">'+' [bengali.ai](https://bengali.ai/)',unsafe_allow_html=True)
st.markdown("---")
# For newline
st.write("\n")
# File selection
st.title("Document selection")
# Choose your own image
uploaded_file = st.file_uploader("Upload files", type=["png", "jpeg", "jpg"])
# For newline
st.write("\n")
# Instructions
st.markdown("*click on the top-right corner of an image to enlarge it!*")
# Set the columns
cols = st.columns((1,1,1,1,1))
cols[0].subheader("Input Image")
cols[1].subheader("Processed Image")
cols[2].subheader("Word Detection")
cols[3].subheader("Document Layout")
cols[4].subheader("Text and Reading Order")
if uploaded_file is not None:
image = Image.open(uploaded_file).convert("RGB")
arr = np.array(image)
cols[0].image(arr)
with st.spinner('Executing OCR'):
output=ocr(arr)
cols[1].image(output["rotation"]["rotated_image"])
# word-detection
word_det_viz=draw_word_polys(output["rotation"]["rotated_image"],[entry["poly"] for entry in output["words"]])
cols[2].image(word_det_viz)
# layout
layout_viz=draw_document_layout(output["rotation"]["rotated_image"],output["segments"])
cols[3].image(layout_viz)
# recognition and rdo
df=pd.DataFrame(output["words"])
df=df[['text','line_num','word_num']]
cols[4].dataframe(df)
# text construction
st.title("Layout wise text construction")
segments=output["segments"]
words=output["words"]
segmented_data=process_segments_and_words(segments,words)
layout_text_data=construct_text_from_segments(segmented_data)
st.text_area("layout text", value=layout_text_data,height=400)
# Anabil---> Code frem here
st.title("HTML Recontruction")
height,width=arr.shape[:2]
html_data=generate_html(segmented_data,height,width,image)
st.components.v1.html(html_data, height=600, scrolling=True)
# Word Analysis
st.title("Word Analysis")
crops=ocr.detector.get_crops(output["rotation"]["rotated_image"],[entry["poly"] for entry in output["words"]])
crops=[correctPadding(crop,(128,1024)) for crop in crops]
crops=[ crop[:,:pad_w] for (crop,pad_w) in crops]
data=[{"image": crop,"text":text} for crop,text in zip(crops,[entry["text"] for entry in output["words"]])]
# Custom CSS to center the table
st.markdown(
"""
<style>
.centered-table {
display: flex;
justify-content: center;
}
</style>
""",
unsafe_allow_html=True,
)
# Display the table in the center
st.markdown('<div class="centered-table">', unsafe_allow_html=True)
# Iterate over the data in chunks of 5 to create rows
for i in range(0, len(data), 10):
cols = st.columns(10) # Create 5 columns for each row
for j in range(10):
if i + j < len(data): # Ensure we don't go out of bounds
with cols[j]: # Access the j-th column in the current row
st.image(data[i + j]["image"], caption=data[i + j]["text"], use_container_width=True)
st.markdown('</div>', unsafe_allow_html=True)
if __name__ == '__main__':
main()