Skip to content

Commit 0bfad6c

Browse files
authored
Merge pull request #6 from anisdismail/master
add-ocr-endpoints
2 parents 737abaf + 513cbb4 commit 0bfad6c

File tree

6 files changed

+186
-6
lines changed

6 files changed

+186
-6
lines changed

README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ This repo is based on [Tensorflow Object Detection API](https://github.com/tenso
66
The Tensorflow version used is 1.13.1. The inference REST API works on CPU and doesn't require any GPU usage. It's supported on both Windows and Linux Operating systems.
77

88
Models trained using our training tensorflow repository can be deployed in this API. Several object detection models can be loaded and used at the same time.
9+
This repo also offers optical character recognition services to extract textboxes from images.
910

1011
This repo can be deployed using either **docker** or **docker swarm**.
1112

@@ -153,8 +154,17 @@ Returns the specified model's configuration
153154

154155
Performs inference on specified model and a list of images, and returns bounding boxes
155156

156-
**P.S: Custom endpoints like /load, /detect, and /get_labels should be used in a chronological order. First you have to call /load, and then call /detect or /get_labels**
157+
#### /models/{model_name}/one_shot_ocr (POST)
157158

159+
Takes an image and returns extracted text details. In first place a detection model will be used for cropping interesting areas in the uploaded image. Then, these areas will be passed to the OCR-Service for text extraction.
160+
161+
#### /models/{model_name}/ocr (POST)
162+
163+
![predict image](./docs/5.gif)
164+
165+
Takes an image and returns extracted text details without using an object detection model
166+
167+
**P.S: Custom endpoints like /load, /detect, /get_labels and /one_shot_ocr should be used in a chronological order. First you have to call /load, and then call /detect, /get_labels or /one_shot_ocr
158168
## Model structure
159169

160170
The folder "models" contains subfolders of all the models to be loaded.
@@ -257,3 +267,5 @@ Inside each subfolder there should be a:
257267
Joe Sleiman, inmind.ai , Beirut, Lebanon
258268

259269
Antoine Charbel, inmind.ai, Beirut, Lebanon
270+
271+
[Anis Ismail](https://www.linkedin.com/in/anisdismail), Lebanese American University, Beirut, Lebanon

docker/dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ LABEL maintainer="antoine.charbel@inmind.ai"
55
COPY docker/requirements.txt .
66
COPY src/main /main
77

8+
RUN apt-get update && apt-get install -y tesseract-ocr
9+
810
RUN pip install -r requirements.txt
911

1012
WORKDIR /main

docker/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ socketIO-client-nexus
1616
tensorflow==1.13.1
1717
uvicorn
1818
jsonschema
19-
20-
19+
pytz
20+
pytesseract
2121

2222

2323

docs/5.gif

3.94 MB
Loading

src/main/ocr.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import pytesseract
2+
import unicodedata
3+
import re
4+
import numpy as np
5+
6+
7+
8+
# Define class variables
9+
10+
bounding_box_order = ["left", "top", "right", "bottom"]
11+
12+
# This method will take the model bounding box predictions and return the extracted text inside each box
13+
def one_shot_ocr_service(image, output):
14+
# iterate over detections
15+
response = []
16+
detections = output['bounding-boxes']
17+
18+
for i in range(0, len(detections)):
19+
20+
# crop image for every detection:
21+
coordinates = (detections[i]["coordinates"])
22+
cropped = image.crop((float(coordinates["left"]), float(
23+
coordinates["top"]), float(coordinates["right"]), float(coordinates["bottom"])))
24+
25+
# convert image to grayscale for better accuracy
26+
processed_img=cropped.convert('L')
27+
28+
# extract text with positive confidence from cropped image
29+
df = pytesseract.image_to_data(processed_img, output_type='data.frame')
30+
valid_df = df[df["conf"] > 0]
31+
extracted_text = " ".join(valid_df["text"].values)
32+
33+
# process text
34+
extracted_text = str(unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", " ").replace(
35+
"...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace("alt/1m", "").strip()
36+
extracted_text = re.sub(
37+
'[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text)
38+
extracted_text = " ".join(extracted_text.split())
39+
40+
# wrap each prediction inside a dictionary
41+
if len(extracted_text) is not 0:
42+
prediction = dict()
43+
prediction["text"] = extracted_text
44+
bounding_box = [coordinates[el] for el in bounding_box_order]
45+
prediction["box"] = bounding_box
46+
prediction["score"] = valid_df["conf"].mean()/100.0
47+
48+
response.append(prediction)
49+
50+
return response
51+
52+
# This method will take an image and return the extracted text from the image
53+
def ocr_service(image):
54+
# convert image to grayscale for better accuracy
55+
processed_img=image.convert('L')
56+
57+
# Get data including boxes, confidences, line and page numbers
58+
df = pytesseract.image_to_data(processed_img, output_type='data.frame')
59+
valid_df = df[df["conf"] > 0]
60+
61+
# process text
62+
extracted_text = " ".join(valid_df["text"].values)
63+
extracted_text = str(unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", " ").replace(
64+
"...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace("alt/1m", "").strip()
65+
extracted_text = re.sub(
66+
'[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text)
67+
extracted_text = " ".join(extracted_text.split())
68+
69+
# calculate the bounding box data based on pytesseract results
70+
coordinates = {}
71+
index = valid_df.index.values
72+
coordinates["left"] = valid_df.loc[index[0], "left"]
73+
coordinates["top"] = valid_df.loc[index[0], "top"]
74+
coordinates["bottom"] = valid_df.loc[index[-1],
75+
"top"] + valid_df.loc[index[-1], "height"]
76+
coordinates["right"] = valid_df.loc[index[-1],
77+
"left"] + valid_df.loc[index[-1], "width"]
78+
bounding_box = [coordinates[el].item() for el in bounding_box_order]
79+
80+
# wrap each prediction inside a dictionary
81+
response = {}
82+
response["text"] = extracted_text
83+
response["box"] = bounding_box
84+
response["score"] = valid_df["conf"].mean()/100.0
85+
86+
return [response]

src/main/start.py

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
from starlette.staticfiles import StaticFiles
77
from starlette.middleware.cors import CORSMiddleware
88
from deep_learning_service import DeepLearningService
9-
from fastapi import FastAPI, Form, File, UploadFile, Header
9+
from fastapi import FastAPI, Form, File, UploadFile, Header, HTTPException
1010
from inference.exceptions import ModelNotFound, InvalidModelConfiguration, ApplicationError, ModelNotLoaded, \
11-
InferenceEngineNotFound, InvalidInputData
12-
11+
InferenceEngineNotFound, InvalidInputData
12+
from ocr import ocr_service, one_shot_ocr_service
13+
from datetime import datetime
14+
import pytz
15+
from PIL import Image
1316

1417
sys.path.append('./inference')
1518

19+
tz = pytz.timezone("Europe/Berlin")
20+
1621
dl_service = DeepLearningService()
1722
error_logging = Error()
1823
app = FastAPI(version='1.0', title='BMW InnovationLab tensorflow cpu inference Automation',
@@ -185,3 +190,78 @@ async def list_model_config(model_name: str):
185190
"""
186191
config = dl_service.get_config(model_name)
187192
return ApiResponse(data=config)
193+
194+
195+
@app.post('/models/{model_name}/one_shot_ocr')
196+
async def one_shot_ocr(
197+
model_name: str,
198+
image: UploadFile = File(
199+
..., description="Image to perform optical character recognition based on layout inference:")
200+
):
201+
"""
202+
Takes an image and returns extracted text details.
203+
204+
In first place a detection model will be used for cropping interesting areas in the uploaded image. These areas will then be passed to the OCR-Service for text extraction.
205+
206+
:param model_name: Model name or model hash for layout detection
207+
208+
:param image: Image file
209+
210+
:return: Text fields with the detected files inside
211+
212+
"""
213+
output = None
214+
# call detection on image with choosen model
215+
try:
216+
output = await run_model(model_name, image)
217+
except:
218+
raise HTTPException(status_code=404, detail='Invalid Model')
219+
220+
# run ocr_service
221+
response = None
222+
try:
223+
image = Image.open(image.file).convert('RGB')
224+
response = one_shot_ocr_service(image, output.data)
225+
except:
226+
raise HTTPException(
227+
status_code=500, detail='Unexpected Error during Inference (Determination of Texts)')
228+
229+
if not response:
230+
raise HTTPException(
231+
status_code=400, detail='Inference (Determination of Texts) is not Possible with the Specified Model')
232+
233+
return response
234+
235+
236+
@app.post('/models/{model_name}/ocr')
237+
async def optical_character_recognition(
238+
model_name: str,
239+
image: UploadFile = File(
240+
..., description="Image to perform optical character recognition based on layout inference:"),
241+
):
242+
"""
243+
Takes an image and returns extracted text informations.
244+
245+
The image is passed to the OCR-Service for text extraction
246+
247+
:param model: Model name or model hash
248+
249+
:param image: Image file
250+
251+
:return: Text fields with the detected files inside
252+
253+
"""
254+
# run ocr_service
255+
response = None
256+
try:
257+
image = Image.open(image.file).convert('RGB')
258+
response = ocr_service(image)
259+
except:
260+
raise HTTPException(
261+
status_code=500, detail='Unexpected Error during Inference (Determination of Texts)')
262+
263+
if not response:
264+
raise HTTPException(
265+
status_code=400, detail='Inference (Determination of Texts) is not Possible with the Specified Model')
266+
267+
return response

0 commit comments

Comments
 (0)