-
-
Save ThomasHineXYZ/36b61133a2c86ffa1422ee6c3063f786 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# Google Keep "Takeout" to Markdown Converter | |
# This allows you to convert your Google Keep notes that are downloaded from | |
# Google's "Takeout" system. This works with Nextcloud's Notes system. | |
from datetime import datetime | |
import json | |
import os | |
import shutil | |
# Set the input_path to the `Keep` folder within your Google Takeout archive (extracted). | |
input_path = "/tmp/Takeout/Keep/" | |
# The file type that you'd like the export to be saved as. | |
output_extention = ".md" | |
# Where you'd like the converted files to be stored. | |
output_path = "/tmp/keep_converted/" | |
# These are the note colours, lifted directly from the Takeout's CSS | |
note_colours = { | |
"blue": "#3FC3FF", | |
"brown": "#D7CCC8", | |
"cerulean": "#82B1FF", | |
"gray": "#B8C4C9", | |
"green": "#95D641", | |
"orange": "#FF9B00", | |
"pink": "#F8BBD0", | |
"purple": "#B388FF", | |
"red": "#FF6D3F", | |
"teal": "#1CE8B5", | |
"yellow": "#FFDA00", | |
} | |
# Grab a list of the files | |
files = os.listdir(input_path) | |
for file in files: | |
title = "No Title" | |
colour = "" | |
content = "" | |
main_label = "" | |
# Split the file name up in to the name and the extension | |
split_tup = os.path.splitext(file) | |
# Store the file name and extension in variables | |
file_name = split_tup[0] | |
file_extension = split_tup[1] | |
if file_extension.lower() == ".txt": | |
# Read the contents of the text file | |
text_file = open(f"{input_path}{file}", 'r') | |
content += text_file.read() | |
text_file.close() | |
title = file_name | |
elif file_extension.lower() == ".json": | |
json_file = open(f"{input_path}{file}", 'r', encoding='utf-8') | |
json_data = json.load(json_file) | |
json_file.close() | |
# Set the title to what it had before | |
if (("title" in json_data) and | |
(json_data['title'])): | |
title = json_data['title'].strip() | |
# Set the colour, if it isn't default | |
if (("color" in json_data) and | |
(json_data['color']) and | |
(json_data['color'].lower() in note_colours)): | |
colour = note_colours[json_data['color'].lower()] | |
# Grab the content if there's some there | |
if (("textContent" in json_data) and | |
(json_data['textContent'])): | |
content += json_data['textContent'].strip() + "\n" | |
# List items | |
if (("listContent" in json_data) and | |
(json_data['listContent'])): | |
for list_item in json_data['listContent']: | |
if list_item['isChecked']: | |
content += f"🗹 ~~{list_item['text']}~~\n" | |
else: | |
content += f"☐ {list_item['text']}\n" | |
# Attachments | |
if (("attachments" in json_data) and | |
(json_data['attachments'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Attachments:\n" | |
for attachment in json_data['attachments']: | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}ATTACHMENTS/"): | |
os.makedirs(f"{output_path}ATTACHMENTS/") | |
# If it's a JPG image, since Google is dumb and mixes up JPG and JPEG extensions | |
if attachment['mimetype'] == "image/jpeg": | |
try: | |
# Split up the file name | |
split_tup = os.path.splitext(attachment['filePath']) | |
image_file = split_tup[0] | |
# Copy the attachment over | |
shutil.copy2(f"{input_path}{image_file}.jpg", f"{output_path}ATTACHMENTS/{image_file}.jpg") | |
content += f"* [{image_file}.jpg](ATTACHMENTS/{image_file}.jpg)\n" | |
except Exception as e: | |
raise Exception(e) | |
# Annotations | |
if (("annotations" in json_data) and | |
(json_data['annotations'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Embeds:\n" | |
for annotation in json_data['annotations']: | |
# Clean them up a little bit | |
annotation_description = annotation['description'].replace("\n", "") | |
annotation_source = annotation['source'].replace("\n", "") | |
annotation_title = annotation['title'].replace("\n", "") | |
annotation_url = annotation['url'].replace("\n", "") | |
annotation_description = annotation_description.replace('"', "'") | |
annotation_source = annotation_source.replace('"', "'") | |
annotation_title = annotation_title.replace('"', "'") | |
annotation_url = annotation_url.replace('"', "'") | |
annotation_description = annotation_description.strip() | |
annotation_source = annotation_source.strip() | |
annotation_title = annotation_title.strip() | |
annotation_url = annotation_url.strip() | |
# Then add them in to the content area | |
content += f"* {annotation_source.title()}: " | |
content += f"[{annotation_title}]({annotation_url} \"{annotation_description}\")\n" | |
# Labels | |
if (("labels" in json_data) and | |
(json_data['labels'])): | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Labels:\n" | |
for label in json_data['labels']: | |
# Sets the main / first label if one isn't set yet | |
if not main_label: | |
main_label = label['name'] | |
content += f"* {label['name']}\n" | |
# Round and convert the value to an int, since we don't care about | |
# anything smaller than seconds | |
timestamp = int(round(json_data['userEditedTimestampUsec'] / 1000000)) | |
converted_timestamp = datetime.fromtimestamp(timestamp).isoformat() | |
# Extra Values | |
content = content.strip() # Just to clear out any unwanted ending whitespace | |
content += "\n\n## Values:\n" | |
content += f"* Colour: {json_data['color']}\n" | |
content += f"* isArchived: {json_data['isArchived']}\n" | |
content += f"* isPinned: {json_data['isPinned']}\n" | |
content += f"* isTrashed: {json_data['isTrashed']}\n" | |
content += f"* Last Modified: {converted_timestamp}\n" | |
else: # If it's any other file type, just skip it | |
continue | |
# Do some final clean up of the title and content, just in case | |
title = title.strip() | |
content = content.strip() | |
# Now put together the new markdown file | |
document = "" | |
document += title + "\n" | |
document += "-" * len(title) + "\n" | |
document += f"Colour: {colour}\n\n" if colour else "\n" | |
document += content + "\n" | |
label_folder = "" | |
if main_label: | |
main_label = main_label.replace("/", "-") | |
label_folder = f"{main_label}/" | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(f"{output_path}{label_folder}"): | |
os.makedirs(f"{output_path}{label_folder}") | |
new_file = f"{output_path}{label_folder}{file_name}{output_extention}" | |
f = open(new_file, "w") | |
f.write(document) | |
f.close() | |
# Set the modified time on them to their old date | |
os.utime(new_file,(timestamp, timestamp)) | |
print(f"Converted {input_path}{file} to {new_file}") |
This is great! Thank you for sharing.
Made a quick change, adding line 191 to include
os.utime(new_file,(timestamp, timestamp))
, to make NextCloud display the notes by year in left sidebar in the Notes app.
Done.
I didn't see your comment until today. Sorry about that.
Hello, is there a way to support Unicode? I got UnicodeEncodeError.
Thanks
Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks
I've just got it working, you've to modify line 191
from:: f = open(new_file, "w")
to: f = open(new_file, 'w',encoding="utf-8")
(be careful with the quote marks, you have to replace with the single ones)
Hello, is there a way to support Unicode? I got UnicodeEncodeError. Thanks
I've just got it working, you've to modify line 191 from::
f = open(new_file, "w")
to:f = open(new_file, 'w',encoding="utf-8")
(be careful with the quote marks, you have to replace with the single ones)
Thanks for your effort, I'll try it out!
This is great! Thank you for sharing.
Made a quick change, adding line 191 to include
os.utime(new_file,(timestamp, timestamp))
, to make NextCloud display the notes by year in left sidebar in the Notes app.