Created
November 14, 2024 10:33
-
-
Save gianpaj/8c85ad0a6c3e4d1d72c376b50e2a6d3b to your computer and use it in GitHub Desktop.
Send a prompt with an image to the Ollama server for vision models like Llama 3.2-vision (no external packages needed)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import base64 | |
import requests | |
import json | |
import sys | |
from pathlib import Path | |
def image_to_base64(image_path): | |
"""Convert image to base64 string""" | |
try: | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
except Exception as e: | |
print(f"Error converting image to base64: {e}") | |
sys.exit(1) | |
def make_ollama_request(base64_image, prompt, model, api_url): | |
"""Make API request to Ollama""" | |
headers = { | |
'Content-Type': 'application/json' | |
} | |
payload = { | |
"model": model, | |
"prompt": prompt, | |
"images": [base64_image], | |
"stream": False | |
} | |
# print("Payload:") | |
# print(json.dumps(payload, indent=2)) | |
try: | |
response = requests.post(api_url, headers=headers, json=payload) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
print(f"Error making API request: {e}") | |
sys.exit(1) | |
def main(): | |
parser = argparse.ArgumentParser(description='Convert image to base64 and make Ollama API request') | |
parser.add_argument('--image', required=True, help='Path to the image file') | |
parser.add_argument('--prompt', required=True, help='Prompt for the model') | |
parser.add_argument('--model', default='llama3.2-vision', help='Model to use (default: llama3.2-vision)') | |
parser.add_argument('--api-url', default='http://localhost:11434/api/generate', | |
help='Ollama API URL (default: http://localhost:11434/api/generate)') | |
args = parser.parse_args() | |
# Validate image path | |
image_path = Path(args.image) | |
if not image_path.exists(): | |
print(f"Error: Image file '{args.image}' does not exist") | |
sys.exit(1) | |
# Convert image to base64 | |
print("Converting image to base64...") | |
base64_image = image_to_base64(args.image) | |
# Make API request | |
print("Making API request to Ollama...") | |
response = make_ollama_request(base64_image, args.prompt, args.model, args.api_url) | |
# Print response | |
print("\nResponse from Ollama:") | |
print(json.dumps(response, indent=2)) | |
# Print response times | |
if 'total_duration' in response: | |
total_duration = response['total_duration'] / 1e9 # Convert to seconds | |
load_duration = response.get('load_duration', 0) / 1e9 | |
prompt_eval_duration = response.get('prompt_eval_duration', 0) / 1e9 | |
eval_duration = response.get('eval_duration', 0) / 1e9 | |
print("\nTiming Information:") | |
print(f"Total Duration: {total_duration:.2f} seconds") | |
print(f"Load Duration: {load_duration:.2f} seconds") | |
print(f"Prompt Evaluation Duration: {prompt_eval_duration:.2f} seconds") | |
print(f"Evaluation Duration: {eval_duration:.2f} seconds") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment