Created
August 27, 2018 12:59
-
-
Save amitu/2f4bda3ae4916daac86fcb2cce71471a to your computer and use it in GitHub Desktop.
OCR using google
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright 2017 Google Inc. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Outlines document text given an image. | |
Example: | |
python doctext.py resources/text_menu.jpg | |
""" | |
""" | |
$ export GOOGLE_APPLICATION_CREDENTIALS=tmp/Karmator-05dd6a510e87.json | |
$ python src/scripts/users/amitu/ocr.py tmp/p.png | |
""" | |
# [START full_tutorial] | |
# [START imports] | |
import argparse | |
from enum import Enum | |
import io | |
from google.cloud import vision | |
from google.cloud.vision import types | |
from PIL import Image, ImageDraw | |
# [END imports] | |
class FeatureType(Enum): | |
PAGE = 1 | |
BLOCK = 2 | |
PARA = 3 | |
WORD = 4 | |
SYMBOL = 5 | |
def draw_boxes(image, bounds, color): | |
"""Draw a border around the image using the hints in the vector list.""" | |
# [START draw_blocks] | |
draw = ImageDraw.Draw(image) | |
for bound in bounds: | |
draw.polygon([ | |
bound.vertices[0].x, bound.vertices[0].y, | |
bound.vertices[1].x, bound.vertices[1].y, | |
bound.vertices[2].x, bound.vertices[2].y, | |
bound.vertices[3].x, bound.vertices[3].y], None, color) | |
return image | |
# [END draw_blocks] | |
def get_document_bounds(image_file, feature): | |
# [START detect_bounds] | |
"""Returns document bounds given an image.""" | |
client = vision.ImageAnnotatorClient() | |
bounds = [] | |
with io.open(image_file, 'rb') as image_file: | |
content = image_file.read() | |
image = types.Image(content=content) | |
response = client.document_text_detection(image=image) | |
document = response.full_text_annotation | |
# Collect specified feature bounds by enumerating all document features | |
for page in document.pages: | |
for block in page.blocks: | |
for paragraph in block.paragraphs: | |
for word in paragraph.words: | |
print("".join(s.text for s in word.symbols)) | |
for symbol in word.symbols: | |
if (feature == FeatureType.SYMBOL): | |
bounds.append(symbol.bounding_box) | |
if (feature == FeatureType.WORD): | |
bounds.append(word.bounding_box) | |
if (feature == FeatureType.PARA): | |
bounds.append(paragraph.bounding_box) | |
if (feature == FeatureType.BLOCK): | |
bounds.append(block.bounding_box) | |
if (feature == FeatureType.PAGE): | |
bounds.append(block.bounding_box) | |
# The list `bounds` contains the coordinates of the bounding boxes. | |
# [END detect_bounds] | |
return bounds | |
def render_doc_text(filein, fileout): | |
# [START render_doc_text] | |
image = Image.open(filein) | |
bounds = get_document_bounds(filein, FeatureType.PAGE) | |
draw_boxes(image, bounds, 'blue') | |
bounds = get_document_bounds(filein, FeatureType.PARA) | |
draw_boxes(image, bounds, 'red') | |
bounds = get_document_bounds(filein, FeatureType.WORD) | |
draw_boxes(image, bounds, 'yellow') | |
if fileout is not 0: | |
image.save(fileout) | |
else: | |
image.show() | |
# [END render_doc_text] | |
if __name__ == '__main__': | |
# [START run_doc_text] | |
parser = argparse.ArgumentParser() | |
parser.add_argument('detect_file', help='The image for text detection.') | |
parser.add_argument('-out_file', help='Optional output file', default=0) | |
args = parser.parse_args() | |
parser = argparse.ArgumentParser() | |
render_doc_text(args.detect_file, args.out_file) | |
# [END run_doc_text] | |
# [END full_tutorial] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment