Skip to content

Instantly share code, notes, and snippets.

@bigsnarfdude
Created January 14, 2025 18:38
Show Gist options
  • Save bigsnarfdude/62ebb0f5c0490f4af59211e1359fa1bb to your computer and use it in GitHub Desktop.
Save bigsnarfdude/62ebb0f5c0490f4af59211e1359fa1bb to your computer and use it in GitHub Desktop.
metagene.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("metagene-ai/METAGENE-1")
model = AutoModelForCausalLM.from_pretrained("metagene-ai/METAGENE-1", torch_dtype=torch.bfloat16)
# Example input sequence
input_sequence = "TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTC"
# Tokenize the input sequence and remove the [EOS] token for generation
input_tokens = tokenizer.encode(input_sequence, return_tensors="pt", add_special_tokens=False)
# Generate output from the model
generated_tokens = model.generate(input_tokens, max_length=32)
# Decode the generated output and clean up the result
generated_sequence = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
generated_sequence = generated_sequence.replace(" ", "").replace("_", "")
# Generated output: A Hexamita inflata 5.8S ribosomal RNA gene sequence
print(f"🔬 Generated Sequence:\n{generated_sequence}")
# TCACCGTTCTACAATCCCAAGCTGGAGTCAAGCTCAACAGGGTCTTCTTGCCCCGCTGAGGGTTACACTCGCCCGTTCCCGAGTCTGTGGTTTCGCGAAGATATGACCAGGGACAGTAAGAACC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment