Last active
June 5, 2024 23:54
-
-
Save ThioJoe/e84c4e649857bf0a290b49a2caa670f7 to your computer and use it in GitHub Desktop.
Basic Whisper Transcription Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Basic script for using the OpenAI Whisper model to transcribe a video file. You can uncomment whichever model you want to use. | |
# Author: ThioJoe ( https://github.com/ThioJoe ) | |
# Required third party packages: whisper | |
# See instructions for setup here: https://github.com/openai/whisper#setup | |
# - You can use the below command to pull the repo and install dependencies, then just put this script in the repo directory: | |
# pip install git+https://github.com/openai/whisper.git | |
import whisper | |
import io | |
import time | |
import os | |
import json | |
import pathlib | |
# Choose model to use by uncommenting | |
#modelName = "tiny.en" | |
#modelName = "base.en" | |
#modelName = "small.en" | |
#modelName = "medium.en" | |
modelName = "large-v2" | |
# Other Variables | |
exportTimestampData = False # (bool) Whether to export the segment data to a json file. Will include word level timestamps if word_timestamps is True. | |
outputFolder = "Output" | |
# ----- Select variables for transcribe method ----- | |
# audio: path to audio file | |
verbose = False # (bool): Whether to display the text being decoded to the console. If True, displays all the details, If False, displays minimal details. If None, does not display anything | |
language="english" # Language of audio file | |
word_timestamps=False # (bool): Extract word-level timestamps using the cross-attention pattern and dynamic time warping, and include the timestamps for each word in each segment. | |
#initial_prompt="" # (optional str): Optional text to provide as a prompt for the first window. This can be used to provide, or "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns to make it more likely to predict those word correctly. | |
# ------------------------------------------------------------------------- | |
print(f"Using Model: {modelName}") | |
filePath = input("Path to File Being Transcribed: ") | |
filePath = filePath.strip("\"") | |
if not os.path.exists(filePath): | |
print("Problem Getting File...") | |
input("Press Enter to Exit...") | |
exit() | |
# If output folder does not exist, create it | |
if not os.path.exists(outputFolder): | |
os.makedirs(outputFolder) | |
print("Created Output Folder.\n") | |
# Get filename stem using pathlib (filename without extension) | |
fileNameStem = pathlib.Path(filePath).stem | |
resultFileName = f"{fileNameStem}.txt" | |
jsonFileName = f"{fileNameStem}.json" | |
model = whisper.load_model(modelName) | |
start = time.time() | |
# --------------------------------------------------- | |
result = model.transcribe(audio=filePath, language=language, word_timestamps=word_timestamps, verbose=verbose) | |
# --------------------------------------------------- | |
end = time.time() | |
elapsed = float(end - start) | |
# Save transcription text to file | |
print("\nWriting transcription to file...") | |
with open(os.path.join(outputFolder, resultFileName), "w", encoding="utf-8") as file: | |
file.write(result["text"]) | |
print("Finished writing transcription file.") | |
# Save the segments data to json file | |
#if word_timestamps == True: | |
if exportTimestampData == True: | |
print("\nWriting segment data to file...") | |
with open(os.path.join(outputFolder, jsonFileName), "w", encoding="utf-8") as file: | |
segmentsData = result["segments"] | |
json.dump(segmentsData, file, indent=4) | |
print("Finished writing segment data file.") | |
elapsedMinutes = str(round(elapsed/60, 2)) | |
print(f"\nElapsed Time With {modelName} Model: {elapsedMinutes} Minutes") | |
input("Press Enter to exit...") | |
exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uploading I'm a Pretty Princess (Ai Edition) 👸🏼#ai #chatgpt #aiart.mp4…