Created
December 9, 2020 15:29
-
-
Save reagle/5bc44ba9e2f1b961d1aaca9179fb403b to your computer and use it in GitHub Desktop.
Given a FOLDER, replicate all markdown files with pelican metadata converted to pandoc YAML metadata in `new-FOLDER`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# DESCRIPTION | |
# (c) Copyright 2020 by Joseph Reagle | |
# Licensed under the GPLv3, see <http://www.gnu.org/licenses/gpl-3.0.html> | |
# | |
import argparse # http://docs.python.org/dev/library/argparse.html | |
import codecs | |
import logging | |
import os | |
import sys | |
from pathlib import Path, PurePath | |
import yaml | |
HOME = str(Path("~").expanduser()) | |
exception = logging.exception | |
critical = logging.critical | |
error = logging.error | |
warning = logging.warning | |
info = logging.info | |
debug = logging.debug | |
AUTHOR = "Joseph Reagle" # default author if not provided | |
def main(argv): | |
"""Process arguments""" | |
arg_parser = argparse.ArgumentParser( | |
description="Given a FOLDER, replicate all markdown files with pelican metadata converted to pandoc YAML metadata in `new-FOLDER`" | |
) | |
# positional arguments | |
arg_parser.add_argument("folder", nargs=1, metavar="FOLDER") | |
# optional arguments | |
arg_parser.add_argument( | |
"-s", | |
"--short", | |
action="store_true", | |
default=False, | |
help="create short entries of metadata only (for testing)", | |
) | |
arg_parser.add_argument( | |
"-L", | |
"--log-to-file", | |
action="store_true", | |
default=False, | |
help="log to file %(prog)s.log", | |
) | |
arg_parser.add_argument( | |
"-V", | |
"--verbose", | |
action="count", | |
default=0, | |
help="increase verbosity (specify multiple times for more)", | |
) | |
arg_parser.add_argument("--version", action="version", version="0.5") | |
args = arg_parser.parse_args(argv) | |
log_level = logging.ERROR # 40 | |
if args.verbose == 1: | |
log_level = logging.WARNING # 30 | |
elif args.verbose == 2: | |
log_level = logging.INFO # 20 | |
elif args.verbose >= 3: | |
log_level = logging.DEBUG # 10 | |
LOG_FORMAT = "%(levelname).3s %(funcName).5s: %(message)s" | |
if args.log_to_file: | |
print("logging to file") | |
logging.basicConfig( | |
filename=f"{str(PurePath(__file__).name)}.log", | |
filemode="w", | |
level=log_level, | |
format=LOG_FORMAT, | |
) | |
else: | |
logging.basicConfig(level=log_level, format=LOG_FORMAT) | |
return args | |
def process_folder(args): | |
info(f"{args.folder[0]=}") | |
for path, dirs, filenames in os.walk(args.folder[0]): | |
for filename in filenames: | |
# BUG?: would there be non-markdown files to replicate? | |
# BUG?: does pelican support file extensions beyond ".md"? | |
if not filename.endswith((".md", ".markdown")): | |
error(f"skipping {filename=}") | |
continue | |
filename_full = f"{path}/{filename}" | |
path_new = f"new-{path}" | |
filename_new_full = f"{path_new}/{filename}" | |
info(f"processing {filename_full=}") | |
with open(filename_full, "r") as fn: | |
content = fn.read() | |
if content[0] == codecs.BOM_UTF8.decode("utf8"): | |
content = content[1:] | |
content = content.split("\n") | |
debug(f"{content=}") | |
content_new = [] | |
metadata = {} | |
metadata_new = [] | |
for index, line in enumerate(content): | |
debug(f"{index=}{line=}") | |
if ":" in line: | |
key, value = line.split(":", 1) | |
if value.strip(): | |
metadata[key.strip().lower()] = value.strip() | |
else: | |
break | |
if not "author" in metadata: | |
metadata["author"] = AUTHOR | |
info(f"---\n{yaml.dump(metadata)}...\n") | |
content_new.append(f"---\n{yaml.dump(metadata)}...\n") | |
if not args.short: | |
debug("".join(content[index:])) | |
content_new.append("\n".join(content[index:])) | |
info(f"{path_new=}") | |
os.makedirs(path_new, exist_ok=True) | |
with open(filename_new_full, "w") as fn_new: | |
fn_new.write("".join(content_new)) | |
if __name__ == "__main__": | |
args = main(sys.argv[1:]) | |
process_folder(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment