Skip to content

Instantly share code, notes, and snippets.

@davo
Forked from vicradon/analyze-image.py
Created December 9, 2024 21:21
Show Gist options
  • Save davo/a8e3f45071090c86b0ccda2e35223305 to your computer and use it in GitHub Desktop.
Save davo/a8e3f45071090c86b0ccda2e35223305 to your computer and use it in GitHub Desktop.
Image analysis lab
from dotenv import load_dotenv
import os
from array import array
from PIL import Image, ImageDraw
import sys
import time
from matplotlib import pyplot as plt
import numpy as np
# import namespaces
import azure.ai.vision as sdk
def main():
global cv_client
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Get image
image_file = 'images/street.jpg'
if len(sys.argv) > 1:
image_file = sys.argv[1]
# Authenticate Azure AI Vision client
cv_client = sdk.VisionServiceOptions(ai_endpoint, ai_key)
# Analyze image
AnalyzeImage(image_file, cv_client)
# Generate thumbnail
BackgroundForeground(image_file, cv_client)
except Exception as ex:
print(ex)
def AnalyzeImage(image_file, cv_client):
print('\nAnalyzing', image_file)
# Specify features to be retrieved
analysis_options = sdk.ImageAnalysisOptions()
features = analysis_options.features = (
sdk.ImageAnalysisFeature.CAPTION |
sdk.ImageAnalysisFeature.DENSE_CAPTIONS |
sdk.ImageAnalysisFeature.TAGS |
sdk.ImageAnalysisFeature.OBJECTS |
sdk.ImageAnalysisFeature.PEOPLE
)
# Get image analysis
image = sdk.VisionSource(image_file)
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
# Get image captions
if result.caption is not None:
print("\nCaption:")
print(" Caption: '{}' (confidence: {:.2f}%)".format(result.caption.content, result.caption.confidence * 100))
# Get image dense captions
if result.dense_captions is not None:
print("\nDense Captions:")
for caption in result.dense_captions:
print(" Caption: '{}' (confidence: {:.2f}%)".format(caption.content, caption.confidence * 100))
# Get image tags
if result.tags is not None:
print("\nTags:")
for tag in result.tags:
print(" Tag: '{}' (confidence: {:.2f}%)".format(tag.name, tag.confidence * 100))
# Get objects in the image
if result.objects is not None:
print("\nObjects in image:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for detected_object in result.objects:
# Print object name
print(" {} (confidence: {:.2f}%)".format(detected_object.name, detected_object.confidence * 100))
# Draw object bounding box
r = detected_object.bounding_box
bounding_box = ((r.x, r.y), (r.x + r.w, r.y + r.h))
draw.rectangle(bounding_box, outline=color, width=3)
plt.annotate(detected_object.name,(r.x, r.y), backgroundcolor=color)
# Save annotated image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'objects.jpg'
fig.savefig(outputfile)
print(' Results saved in', outputfile)
# Get people in the image
# Get people in the image
if result.people is not None:
print("\nPeople in image:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for detected_people in result.people:
# Draw object bounding box
r = detected_people.bounding_box
bounding_box = ((r.x, r.y), (r.x + r.w, r.y + r.h))
draw.rectangle(bounding_box, outline=color, width=3)
# Return the confidence of the person detected
#print(" {} (confidence: {:.2f}%)".format(detected_people.bounding_box, detected_people.confidence * 100))
# Save annotated image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'people.jpg'
fig.savefig(outputfile)
print(' Results saved in', outputfile)
else:
error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
print(" Analysis failed.")
print(" Error reason: {}".format(error_details.reason))
print(" Error code: {}".format(error_details.error_code))
print(" Error message: {}".format(error_details.message))
def BackgroundForeground(image_file, cv_client):
# Remove the background from the image or generate a foreground matte
print('\nRemove the background from the image or generate a foreground matte')
image = sdk.VisionSource(image_file)
analysis_options = sdk.ImageAnalysisOptions()
# Set the image analysis segmentation mode to background or foreground
analysis_options.segmentation_mode = sdk.ImageSegmentationMode.BACKGROUND_REMOVAL
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
image_buffer = result.segmentation_result.image_buffer
print(" Segmentation result:")
print(" Output image buffer size (bytes) = {}".format(len(image_buffer)))
print(" Output image height = {}".format(result.segmentation_result.image_height))
print(" Output image width = {}".format(result.segmentation_result.image_width))
output_image_file = "newimage.jpg"
with open(output_image_file, 'wb') as binary_file:
binary_file.write(image_buffer)
print(" File {} written to disk".format(output_image_file))
else:
error_details = sdk.ImageAnalysisErrorDetails.from_result(result)
print(" Analysis failed.")
print(" Error reason: {}".format(error_details.reason))
print(" Error code: {}".format(error_details.error_code))
print(" Error message: {}".format(error_details.message))
print(" Did you set the computer vision endpoint and key?")
if __name__ == "__main__":
main()
from dotenv import load_dotenv
import os
# Import namespaces
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
def main():
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Create client using endpoint and key
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)
# Analyze each text file in the reviews folder
reviews_folder = 'reviews'
for file_name in os.listdir(reviews_folder):
# Read the file contents
print('\n-------------\n' + file_name)
text = open(os.path.join(reviews_folder, file_name), encoding='utf8').read()
print('\n' + text)
# Get language
detectedLanguage = ai_client.detect_language(documents=[text])[0]
print('\nLanguage: {}'.format(detectedLanguage.primary_language.name))
# Get sentiment
sentimentAnalysis = ai_client.analyze_sentiment(documents=[text])[0]
print("\nSentiment: {}".format(sentimentAnalysis.sentiment))
# Get key phrases
phrases = ai_client.extract_key_phrases(documents=[text])[0].key_phrases
if len(phrases) > 0:
print("\nKey Phrases:")
for phrase in phrases:
print('\t{}'.format(phrase))
# Get entities
entities = ai_client.recognize_entities(documents=[text])[0].entities
if len(entities) > 0:
print("\nEntities")
for entity in entities:
print('\t{} ({})'.format(entity.text, entity.category))
# Get linked entities
entities = ai_client.recognize_linked_entities(documents=[text])[0].entities
if len(entities) > 0:
print("\nLinks")
for linked_entity in entities:
print('\t{} ({})'.format(linked_entity.name, linked_entity.url))
except Exception as ex:
print(ex)
if __name__ == "__main__":
main()
import os
from dotenv import load_dotenv
# Add Azure OpenAI package
from openai import AzureOpenAI
def main():
try:
# Get configuration settings
load_dotenv()
azure_oai_endpoint = os.getenv("AZURE_OAI_ENDPOINT")
azure_oai_key = os.getenv("AZURE_OAI_KEY")
azure_oai_model = os.getenv("AZURE_OAI_MODEL")
# Read text from file
text = open(file="../text-files/sample-text.txt", encoding="utf8").read()
print("\nSending request for summary to Azure OpenAI endpoint...\n\n")
client = AzureOpenAI(
azure_endpoint = azure_oai_endpoint,
api_key=azure_oai_key,
api_version="2023-05-15"
)
# Send request to Azure OpenAI model
response = client.chat.completions.create(
model=azure_oai_model,
temperature=0.7,
max_tokens=120,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Summarize the following text in 20 words or less:\n" + text}
]
)
print("Summary: " + response.choices[0].message.content + "\n")
except Exception as ex:
print(ex)
if __name__ == '__main__':
main()
pip install azure-ai-vision==0.15.1b1
import os
from dotenv import load_dotenv
# Add Azure OpenAI package
from openai import AzureOpenAI
# Set to True to print the full response from OpenAI for each call
printFullResponse = False
def main():
try:
# Get configuration settings
load_dotenv()
azure_oai_endpoint = os.getenv("AZURE_OAI_ENDPOINT")
azure_oai_key = os.getenv("AZURE_OAI_KEY")
azure_oai_model = os.getenv("AZURE_OAI_MODEL")
# Configure the Azure OpenAI client
client = AzureOpenAI(
azure_endpoint = azure_oai_endpoint,
api_key=azure_oai_key,
api_version="2023-05-15"
)
while True:
print('1: Basic prompt (no prompt engineering)\n' +
'2: Prompt with email formatting and basic system message\n' +
'3: Prompt with formatting and specifying content\n' +
'4: Prompt adjusting system message to be light and use jokes\n' +
'\'quit\' to exit the program\n')
command = input('Enter a number:')
if command == '1':
call_openai_model(messages="../prompts/basic.txt", model=azure_oai_model, client=client)
elif command =='2':
call_openai_model(messages="../prompts/email-format.txt", model=azure_oai_model, client=client)
elif command =='3':
call_openai_model(messages="../prompts/specify-content.txt", model=azure_oai_model, client=client)
elif command =='4':
call_openai_model(messages="../prompts/specify-tone.txt", model=azure_oai_model, client=client)
elif command.lower() == 'quit':
print('Exiting program...')
break
else :
print("Invalid input. Please try again.")
except Exception as ex:
print(ex)
def call_openai_model(messages, model, client):
# In this sample, each file contains both the system and user messages
# First, read them into variables, strip whitespace, then build the messages array
file = open(file=messages, encoding="utf8")
system_message = file.readline().split(':', 1)[1].strip()
user_message = file.readline().split(':', 1)[1].strip()
# Print the messages to the console
print("System message: " + system_message)
print("User message: " + user_message)
# Format and send the request to the model
messages =[
{"role": "system", "content": system_message},
{"role": "user", "content": user_message},
]
# Call the Azure OpenAI model
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.7,
max_tokens=800
)
if printFullResponse:
print(response)
print("Completion: \n\n" + response.choices[0].message.content + "\n")
if __name__ == '__main__':
main()
from dotenv import load_dotenv
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
# Import namespaces
import azure.ai.vision as sdk
def main():
global cv_client
try:
# Get Configuration Settings
load_dotenv()
ai_endpoint = os.getenv('AI_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_SERVICE_KEY')
# Authenticate Azure AI Vision client
cv_client = sdk.VisionServiceOptions(ai_endpoint, ai_key)
# Menu for text reading functions
print('\n1: Use Read API for image (Lincoln.jpg)\n2: Read handwriting (Note.jpg)\nAny other key to quit\n')
command = input('Enter a number:')
if command == '1':
image_file = os.path.join('images','Lincoln.jpg')
GetTextRead(image_file)
elif command =='2':
image_file = os.path.join('images','Note.jpg')
GetTextRead(image_file)
except Exception as ex:
print(ex)
def GetTextRead(image_file):
print('\n')
# Use Analyze image function to read text in image
# Use Analyze image function to read text in image
print('Reading text in {}\n'.format(image_file))
analysis_options = sdk.ImageAnalysisOptions()
features = analysis_options.features = (
# Specify features to be retrieved
sdk.ImageAnalysisFeature.TEXT
)
# Get image analysis
image = sdk.VisionSource(image_file)
image_analyzer = sdk.ImageAnalyzer(cv_client, image, analysis_options)
result = image_analyzer.analyze()
if result.reason == sdk.ImageAnalysisResultReason.ANALYZED:
# Get image captions
if result.text is not None:
print("\nText:")
# Prepare image for drawing
image = Image.open(image_file)
fig = plt.figure(figsize=(image.width/100, image.height/100))
plt.axis('off')
draw = ImageDraw.Draw(image)
color = 'cyan'
for line in result.text.lines:
# Return the text detected in the image
# Return the text detected in the image
print(line.content)
drawLinePolygon = True
r = line.bounding_polygon
bounding_polygon = ((r[0], r[1]),(r[2], r[3]),(r[4], r[5]),(r[6], r[7]))
# Return each line detected in the image and the position bounding box around each line
# Return each word detected in the image and the position bounding box around each word with the confidence level of each word
# Return each word detected in the image and the position bounding box around each word with the confidence level of each word
for word in line.words:
r = word.bounding_polygon
bounding_polygon = ((r[0], r[1]),(r[2], r[3]),(r[4], r[5]),(r[6], r[7]))
print(" Word: '{}', Bounding Polygon: {}, Confidence: {}".format(word.content, bounding_polygon,word.confidence))
# Draw word bounding polygon
drawLinePolygon = False
draw.polygon(bounding_polygon, outline=color, width=3)
# Draw line bounding polygon
if drawLinePolygon:
draw.polygon(bounding_polygon, outline=color, width=3)
# Save image
plt.imshow(image)
plt.tight_layout(pad=0)
outputfile = 'text.jpg'
fig.savefig(outputfile)
print('\n Results saved in', outputfile)
if __name__ == "__main__":
main()
from dotenv import load_dotenv
from datetime import datetime
import os
# Import namespaces
import azure.cognitiveservices.speech as speech_sdk
from playsound import playsound
def main():
try:
global speech_config
# Get Configuration Settings
load_dotenv()
ai_key = os.getenv('SPEECH_KEY')
ai_region = os.getenv('SPEECH_REGION')
# Configure speech service
speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)
print('Ready to use speech service in:', speech_config.region)
# Get spoken input
command = TranscribeCommand()
if command.lower() == 'what time is it?':
TellTime()
except Exception as ex:
print(ex)
def TranscribeCommand():
command = ''
# Configure speech recognition
current_dir = os.getcwd()
audioFile = current_dir + '\\time.wav'
playsound(audioFile)
audio_config = speech_sdk.AudioConfig(filename=audioFile)
speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)
# Process speech input
speech = speech_recognizer.recognize_once_async().get()
print(speech.text)
if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
command = speech.text
print(command)
else:
print(speech.reason)
if speech.reason == speech_sdk.ResultReason.Canceled:
cancellation = speech.cancellation_details
print(cancellation.reason)
print(cancellation.error_details)
# Return the command
return command
def TellTime():
now = datetime.now()
response_text = 'The time is {}:{:02d}'.format(now.hour,now.minute)
# Configure speech synthesis
speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
# Synthesize spoken output
speak = speech_synthesizer.speak_text_async(response_text).get()
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
# Print the response
print(response_text)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment