Last active
July 20, 2024 17:45
-
-
Save ThomasHineXYZ/36b61133a2c86ffa1422ee6c3063f786 to your computer and use it in GitHub Desktop.
Google Keep "Takeout" to Markdown Converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Google Keep "Takeout" to Markdown Converter | |
# This allows you to convert your Google Keep notes that are downloaded from | |
# Google's "Takeout" system. This works with Nextcloud's Notes system. | |
from datetime import datetime | |
import json | |
import os | |
import shutil | |
# Set the input_path to the `Keep` folder within your Google Takeout archive (extracted). | |
input_path = "/tmp/Takeout/Keep/" | |
# The file type that you'd like the export to be saved as. | |
output_extention = ".md" | |
# Where you'd like the converted files to be stored. | |
output_path = "/tmp/keep_converted/" | |
# These are the note colours, lifted directly from the Takeout's CSS | |
note_colours = { | |
"blue": "#3FC3FF", | |
"brown": "#D7CCC8", | |
"cerulean": "#82B1FF", | |
"gray": "#B8C4C9", | |
"green": "#95D641", | |
"orange": "#FF9B00", | |
"pink": "#F8BBD0", | |
"purple": "#B388FF", | |
"red": "#FF6D3F", | |
"teal": "#1CE8B5", | |
"yellow": "#FFDA00", | |
} | |
# Grab a list of the files | |
files = os.listdir(input_path) | |
for file in files: | |
title = "No Title" | |
colour = "" | |
content = "" | |
main_label = "" | |
# Split the file name up in to the name and the extension | |
split_tup = os.path.splitext(file) | |
# Store the file name and extension in variables | |
file_name = split_tup[0] | |
file_extension = split_tup[1] | |
if file_extension.lower() == ".txt": | |
# Read the contents of the text file | |
text_file = open(f"{input_path}{file}", 'r') | |
content += text_file.read() | |
text_file.close() | |
title = file_name | |
elif file_extension.lower() == ".json": | |
json_file = open(f"{input_path}{file}", 'r', encoding='utf-8') | |
json_data = json.load(json_file) | |
json_file.close() | |
# Set the title to what it had before | |
if (("title" in json_data) and | |
(json_data['title'])): | |
title = json_data['title'].strip() | |
# Set the colour, if it isn't default | |
if (("color" in json_data) and | |
(json_data['color']) and | |
(json_data['color'].lower() in note_colours)): | |
colour = note_colours[json_data['color'].lower()] | |
# Grab the content if there's some there | |
if (("textContent" in json_data) and | |
(json_data['textContent'])): | |
content += json_data['textContent'].strip() + "\n" | |
# List items | |
if (("listContent" in json_data) and | |
(json_data['listContent'])): | |
for list_item in json_data['listContent']: | |
if list_item['isChecked']: | |
content += f"🗹 ~~{list_item['text']}~~\n" | |
else: | |
content += f"☐ {list_item['text']}\n" | |
# Attachments | |
if (("attachments" in json_data) and | |
(json_data['attachments'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Attachments:\n" | |
for attachment in json_data['attachments']: | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}ATTACHMENTS/"): | |
os.makedirs(f"{output_path}ATTACHMENTS/") | |
# If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions | |
if attachment['mimetype'] == "image/jpeg": | |
try: | |
# Split up the file name | |
split_tup = os.path.splitext(attachment['filePath']) | |
image_file = split_tup[0] | |
# Copy the attachment over | |
shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg") | |
content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n" | |
except Exception as e: | |
raise Exception(e) | |
# Annotations | |
if (("annotations" in json_data) and | |
(json_data['annotations'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Embeds:\n" | |
for annotation in json_data['annotations']: | |
# Clean them up a little bit | |
annotation_description = annotation['description'].replace("\n", "") | |
annotation_source = annotation['source'].replace("\n", "") | |
annotation_title = annotation['title'].replace("\n", "") | |
annotation_url = annotation['url'].replace("\n", "") | |
annotation_description = annotation_description.replace('"', "'") | |
annotation_source = annotation_source.replace('"', "'") | |
annotation_title = annotation_title.replace('"', "'") | |
annotation_url = annotation_url.replace('"', "'") | |
annotation_description = annotation_description.strip() | |
annotation_source = annotation_source.strip() | |
annotation_title = annotation_title.strip() | |
annotation_url = annotation_url.strip() | |
# Then add them in to the content area | |
content += f"* {annotation_source.title()}: " | |
content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n" | |
# Labels | |
if (("labels" in json_data) and | |
(json_data['labels'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Labels:\n" | |
for label in json_data['labels']: | |
# Sets the main / first label if one isn't set yet | |
if not main_label: | |
main_label = label['name'] | |
content += f"* {label['name']}\n" | |
# Round and convert the value to an int, since we don't care about | |
# anything smaller than seconds | |
timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000)) | |
converted_timestamp = datetime.fromtimestamp(timestamp).isoformat() | |
# Extra Values | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Values:\n" | |
content += f"* Colour: {json_data['color']}\n" | |
content += f"* isArchived: {json_data['isArchived']}\n" | |
content += f"* isPinned: {json_data['isPinned']}\n" | |
content += f"* isTrashed: {json_data['isTrashed']}\n" | |
content += f"* Last Modified: {converted_timestamp}\n" | |
else: # If it's any other file type, just skip it | |
continue | |
# Do some final clean up of the title and content, just in case | |
title = title.strip() | |
content = content.strip() | |
# Now put together the new markdown file | |
document = "" | |
document += title + "\n" | |
document += "-" * len(title) + "\n" | |
document += f"Colour: {colour}\n\n" if colour else "\n" | |
document += content + "\n" | |
label_folder = "" | |
if main_label: | |
main_label = main_label.replace("/", "-") | |
label_folder = f"{main_label}/" | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}{label_folder}"): | |
os.makedirs(f"{output_path}{label_folder}") | |
new_file = f"{output_path}{label_folder}{file_name}{output_extention}" | |
f = open(new_file, "w") | |
f.write(document) | |
f.close() | |
# Set the modified time on them to their old date | |
os.utime(new_file,(timestamp, timestamp)) | |
print(f"Converted {input_path}{file} to {new_file}") |
Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks
I've just got it working, you've to modify line 191
from:: f = open(new_file, "w")
to: f = open(new_file, 'w',encoding="utf-8")
(be careful with the quote marks, you have to replace with the single ones)
Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks
I've just got it working, you've to modify line 191 from::
f = open(new_file, "w")
to:f = open(new_file, 'w',encoding="utf-8")
(be careful with the quote marks, you have to replace with the single ones)
Thanks for your effort, I'll try it out!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello, is there a way to support Unicode? I got UnicodeEncodeError.
Thanks