Skip to content

Instantly share code, notes, and snippets.

@reagle
Created December 9, 2020 15:29
Show Gist options
  • Save reagle/5bc44ba9e2f1b961d1aaca9179fb403b to your computer and use it in GitHub Desktop.
Save reagle/5bc44ba9e2f1b961d1aaca9179fb403b to your computer and use it in GitHub Desktop.
Given a FOLDER, replicate all markdown files with pelican metadata converted to pandoc YAML metadata in `new-FOLDER`
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# DESCRIPTION
# (c) Copyright 2020 by Joseph Reagle
# Licensed under the GPLv3, see <http://www.gnu.org/licenses/gpl-3.0.html>
#
import argparse # http://docs.python.org/dev/library/argparse.html
import codecs
import logging
import os
import sys
from pathlib import Path, PurePath
import yaml
HOME = str(Path("~").expanduser())
exception = logging.exception
critical = logging.critical
error = logging.error
warning = logging.warning
info = logging.info
debug = logging.debug
AUTHOR = "Joseph Reagle" # default author if not provided
def main(argv):
"""Process arguments"""
arg_parser = argparse.ArgumentParser(
description="Given a FOLDER, replicate all markdown files with pelican metadata converted to pandoc YAML metadata in `new-FOLDER`"
)
# positional arguments
arg_parser.add_argument("folder", nargs=1, metavar="FOLDER")
# optional arguments
arg_parser.add_argument(
"-s",
"--short",
action="store_true",
default=False,
help="create short entries of metadata only (for testing)",
)
arg_parser.add_argument(
"-L",
"--log-to-file",
action="store_true",
default=False,
help="log to file %(prog)s.log",
)
arg_parser.add_argument(
"-V",
"--verbose",
action="count",
default=0,
help="increase verbosity (specify multiple times for more)",
)
arg_parser.add_argument("--version", action="version", version="0.5")
args = arg_parser.parse_args(argv)
log_level = logging.ERROR # 40
if args.verbose == 1:
log_level = logging.WARNING # 30
elif args.verbose == 2:
log_level = logging.INFO # 20
elif args.verbose >= 3:
log_level = logging.DEBUG # 10
LOG_FORMAT = "%(levelname).3s %(funcName).5s: %(message)s"
if args.log_to_file:
print("logging to file")
logging.basicConfig(
filename=f"{str(PurePath(__file__).name)}.log",
filemode="w",
level=log_level,
format=LOG_FORMAT,
)
else:
logging.basicConfig(level=log_level, format=LOG_FORMAT)
return args
def process_folder(args):
info(f"{args.folder[0]=}")
for path, dirs, filenames in os.walk(args.folder[0]):
for filename in filenames:
# BUG?: would there be non-markdown files to replicate?
# BUG?: does pelican support file extensions beyond ".md"?
if not filename.endswith((".md", ".markdown")):
error(f"skipping {filename=}")
continue
filename_full = f"{path}/{filename}"
path_new = f"new-{path}"
filename_new_full = f"{path_new}/{filename}"
info(f"processing {filename_full=}")
with open(filename_full, "r") as fn:
content = fn.read()
if content[0] == codecs.BOM_UTF8.decode("utf8"):
content = content[1:]
content = content.split("\n")
debug(f"{content=}")
content_new = []
metadata = {}
metadata_new = []
for index, line in enumerate(content):
debug(f"{index=}{line=}")
if ":" in line:
key, value = line.split(":", 1)
if value.strip():
metadata[key.strip().lower()] = value.strip()
else:
break
if not "author" in metadata:
metadata["author"] = AUTHOR
info(f"---\n{yaml.dump(metadata)}...\n")
content_new.append(f"---\n{yaml.dump(metadata)}...\n")
if not args.short:
debug("".join(content[index:]))
content_new.append("\n".join(content[index:]))
info(f"{path_new=}")
os.makedirs(path_new, exist_ok=True)
with open(filename_new_full, "w") as fn_new:
fn_new.write("".join(content_new))
if __name__ == "__main__":
args = main(sys.argv[1:])
process_folder(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment