Skip to content

Instantly share code, notes, and snippets.

@tomaarsen
Created October 15, 2024 12:30
Show Gist options
  • Save tomaarsen/4b00b0e3be8884efa64cfab9230b161f to your computer and use it in GitHub Desktop.
Save tomaarsen/4b00b0e3be8884efa64cfab9230b161f to your computer and use it in GitHub Desktop.
Export Sentence Transformer models to ONNX (+ optimization, quantization) & OpenVINO
# requires sentence_transformers>=3.2.0
from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model
# The model to export to ONNX (+ optimize, quantize), OpenVINO
model_id = "mixedbread-ai/mxbai-embed-large-v1"
# Where to save the exported models locally
output_dir = model_id.replace("/", "-")
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
onnx_model.save_pretrained(output_dir)
for optimization_config in ["O1", "O2", "O3", "O4"]:
export_optimized_onnx_model(
onnx_model,
optimization_config=optimization_config,
model_name_or_path=output_dir,
)
for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config=quantization_config,
model_name_or_path=output_dir,
)
openvino_model = SentenceTransformer(model_id, backend="openvino")
openvino_model.save_pretrained(output_dir)
# requires sentence_transformers>=3.2.0
from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model
# The model to export to ONNX (+ optimize, quantize), OpenVINO
model_id = "mixedbread-ai/mxbai-embed-large-v1"
# The repository to push the ONNX, OpenVINO models to
output_model_id = "tomaarsen/mxbai-embed-large-v1-exported"
# Do we push directly, or create a PR? A PR is useful for reviewing the changes
# before merging or if you don't have write access.
create_pr = False
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True})
onnx_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)
for optimization_config in ["O1", "O2", "O3", "O4"]:
export_optimized_onnx_model(
onnx_model,
optimization_config=optimization_config,
model_name_or_path=output_model_id,
push_to_hub=True,
create_pr=create_pr,
)
for quantization_config in ['arm64', 'avx2', 'avx512', 'avx512_vnni']:
export_dynamic_quantized_onnx_model(
onnx_model,
quantization_config=quantization_config,
model_name_or_path=output_model_id,
push_to_hub=True,
create_pr=create_pr,
)
openvino_model = SentenceTransformer(model_id, backend="openvino")
openvino_model.push_to_hub(output_model_id, exist_ok=True, create_pr=create_pr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment