Last active
November 22, 2023 19:27
-
-
Save sammcj/31429a4c3836807d6e70363c551c7ce3 to your computer and use it in GitHub Desktop.
describe_image.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# This script is used to describe an image using the LLM model | |
# It can be integrated with the Finder's "Open With" menu | |
# Example models: https://huggingface.co/mys/ggml_llava-v1.5-7b/tree/main | |
# Path to AI llama/llava tool binary | |
# LLAVA_BIN="${HOME}/git/llama.cpp/build/bin/llava" | |
LLAVA_BIN="/usr/local/bin/llava" | |
# Directory containing the LLM models | |
MODELS_DIR="/Volumes/USB-SATA/LLM/models/" | |
# Specific models to be used | |
MODEL="ggml-model-q4_k.gguf" | |
MMPROJ="mmproj-model-f16.gguf" | |
# Get the input image path from the first argument | |
IMAGE="$1" | |
# Set tunable variables | |
TOKENS=256 | |
THREADS=8 | |
MTEMP=0.1 | |
MPROMPT="Describe the image in as much detail as possible." | |
# MPROMPT="Describe the image in as much detail as possible, I will use this description in the text2image tool. Mention a style if possible." | |
MCONTEXT=2048 | |
GPULAYERS=50 | |
# Run llama/llava tool to describe the image | |
OUTPUT="$(${LLAVA_BIN} -m ${MODELS_DIR}/${MODEL} --mmproj ${MODELS_DIR}/${MMPROJ} --threads ${THREADS} --temp ${MTEMP} --prompt "${MPROMPT}" --image "${IMAGE}" --n-gpu-layers ${GPULAYERS} --ctx-size ${MCONTEXT} --n-predict ${TOKENS})" | |
# Copy output to clipboard | |
echo "$OUTPUT" | pbcopy | |
# Make a sound when capture is done | |
say "Done." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment