Skip to content

Instantly share code, notes, and snippets.

@amitu
Created August 27, 2018 12:59
Show Gist options
  • Save amitu/2f4bda3ae4916daac86fcb2cce71471a to your computer and use it in GitHub Desktop.
Save amitu/2f4bda3ae4916daac86fcb2cce71471a to your computer and use it in GitHub Desktop.
OCR using google
#!/usr/bin/env python
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Outlines document text given an image.
Example:
python doctext.py resources/text_menu.jpg
"""
"""
$ export GOOGLE_APPLICATION_CREDENTIALS=tmp/Karmator-05dd6a510e87.json
$ python src/scripts/users/amitu/ocr.py tmp/p.png
"""
# [START full_tutorial]
# [START imports]
import argparse
from enum import Enum
import io
from google.cloud import vision
from google.cloud.vision import types
from PIL import Image, ImageDraw
# [END imports]
class FeatureType(Enum):
PAGE = 1
BLOCK = 2
PARA = 3
WORD = 4
SYMBOL = 5
def draw_boxes(image, bounds, color):
"""Draw a border around the image using the hints in the vector list."""
# [START draw_blocks]
draw = ImageDraw.Draw(image)
for bound in bounds:
draw.polygon([
bound.vertices[0].x, bound.vertices[0].y,
bound.vertices[1].x, bound.vertices[1].y,
bound.vertices[2].x, bound.vertices[2].y,
bound.vertices[3].x, bound.vertices[3].y], None, color)
return image
# [END draw_blocks]
def get_document_bounds(image_file, feature):
# [START detect_bounds]
"""Returns document bounds given an image."""
client = vision.ImageAnnotatorClient()
bounds = []
with io.open(image_file, 'rb') as image_file:
content = image_file.read()
image = types.Image(content=content)
response = client.document_text_detection(image=image)
document = response.full_text_annotation
# Collect specified feature bounds by enumerating all document features
for page in document.pages:
for block in page.blocks:
for paragraph in block.paragraphs:
for word in paragraph.words:
print("".join(s.text for s in word.symbols))
for symbol in word.symbols:
if (feature == FeatureType.SYMBOL):
bounds.append(symbol.bounding_box)
if (feature == FeatureType.WORD):
bounds.append(word.bounding_box)
if (feature == FeatureType.PARA):
bounds.append(paragraph.bounding_box)
if (feature == FeatureType.BLOCK):
bounds.append(block.bounding_box)
if (feature == FeatureType.PAGE):
bounds.append(block.bounding_box)
# The list `bounds` contains the coordinates of the bounding boxes.
# [END detect_bounds]
return bounds
def render_doc_text(filein, fileout):
# [START render_doc_text]
image = Image.open(filein)
bounds = get_document_bounds(filein, FeatureType.PAGE)
draw_boxes(image, bounds, 'blue')
bounds = get_document_bounds(filein, FeatureType.PARA)
draw_boxes(image, bounds, 'red')
bounds = get_document_bounds(filein, FeatureType.WORD)
draw_boxes(image, bounds, 'yellow')
if fileout is not 0:
image.save(fileout)
else:
image.show()
# [END render_doc_text]
if __name__ == '__main__':
# [START run_doc_text]
parser = argparse.ArgumentParser()
parser.add_argument('detect_file', help='The image for text detection.')
parser.add_argument('-out_file', help='Optional output file', default=0)
args = parser.parse_args()
parser = argparse.ArgumentParser()
render_doc_text(args.detect_file, args.out_file)
# [END run_doc_text]
# [END full_tutorial]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment