Skip to content

Instantly share code, notes, and snippets.

@gianpaj
Created November 14, 2024 10:33
Show Gist options
  • Save gianpaj/8c85ad0a6c3e4d1d72c376b50e2a6d3b to your computer and use it in GitHub Desktop.
Save gianpaj/8c85ad0a6c3e4d1d72c376b50e2a6d3b to your computer and use it in GitHub Desktop.
Send a prompt with an image to the Ollama server for vision models like Llama 3.2-vision (no external packages needed)
import argparse
import base64
import requests
import json
import sys
from pathlib import Path
def image_to_base64(image_path):
"""Convert image to base64 string"""
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except Exception as e:
print(f"Error converting image to base64: {e}")
sys.exit(1)
def make_ollama_request(base64_image, prompt, model, api_url):
"""Make API request to Ollama"""
headers = {
'Content-Type': 'application/json'
}
payload = {
"model": model,
"prompt": prompt,
"images": [base64_image],
"stream": False
}
# print("Payload:")
# print(json.dumps(payload, indent=2))
try:
response = requests.post(api_url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error making API request: {e}")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(description='Convert image to base64 and make Ollama API request')
parser.add_argument('--image', required=True, help='Path to the image file')
parser.add_argument('--prompt', required=True, help='Prompt for the model')
parser.add_argument('--model', default='llama3.2-vision', help='Model to use (default: llama3.2-vision)')
parser.add_argument('--api-url', default='http://localhost:11434/api/generate',
help='Ollama API URL (default: http://localhost:11434/api/generate)')
args = parser.parse_args()
# Validate image path
image_path = Path(args.image)
if not image_path.exists():
print(f"Error: Image file '{args.image}' does not exist")
sys.exit(1)
# Convert image to base64
print("Converting image to base64...")
base64_image = image_to_base64(args.image)
# Make API request
print("Making API request to Ollama...")
response = make_ollama_request(base64_image, args.prompt, args.model, args.api_url)
# Print response
print("\nResponse from Ollama:")
print(json.dumps(response, indent=2))
# Print response times
if 'total_duration' in response:
total_duration = response['total_duration'] / 1e9 # Convert to seconds
load_duration = response.get('load_duration', 0) / 1e9
prompt_eval_duration = response.get('prompt_eval_duration', 0) / 1e9
eval_duration = response.get('eval_duration', 0) / 1e9
print("\nTiming Information:")
print(f"Total Duration: {total_duration:.2f} seconds")
print(f"Load Duration: {load_duration:.2f} seconds")
print(f"Prompt Evaluation Duration: {prompt_eval_duration:.2f} seconds")
print(f"Evaluation Duration: {eval_duration:.2f} seconds")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment