Created
March 23, 2023 21:10
-
-
Save hwchase17/1429e54879f0249f0a258382d8bd744c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# STEP 1: Load | |
# Load documents using LangChain's DocumentLoaders | |
# This is from https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/csv.html | |
from langchain.document_loaders.csv_loader import CSVLoader | |
loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv') | |
data = loader.load() | |
# STEP 2: Convert | |
# Convert Document to format expected by https://github.com/openai/chatgpt-retrieval-plugin | |
from typing import List | |
from langchain.docstore.document import Document | |
import json | |
def write_json(path: str, documents: List[Document])-> None: | |
results = [{"text": doc.page_content} for doc in documents] | |
with open(path, "w") as f: | |
json.dump(results, f, indent=2) | |
write_json("foo.json", data) | |
# STEP 3: Use | |
# Ingest this as you would any other json file in https://github.com/openai/chatgpt-retrieval-plugin/tree/main/scripts/process_json |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment