-
-
Save eggplants/df608e8ced4e4d993bfe3031f34fcdea to your computer and use it in GitHub Desktop.
Strip PDF Metadata (more improved version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -euo pipefail | |
# Color Codes so that warnings/errors stick out | |
GREEN="\033[32m" | |
RED="\033[31m" | |
CLEAR="\033[0m" | |
USAGE="usage: $0 [<TARGET DIR>=.]" | |
_help(){ | |
cat << HELP | |
$USAGE | |
# -------------------------------------------------------------------- | |
# Recursively find pdfs from the directory given as the first argument, | |
# otherwise search the current directory. | |
# Use exiftool and qpdf (both must be installed and locatable on \$PATH) | |
# to strip all top-level metadata from PDFs. | |
# | |
# Note - This only removes file-level metadata, not any metadata | |
# in embedded images, etc. | |
# | |
# Code is provided as-is, I take no responsibility for its use, | |
# and I make no guarantee that this code works | |
# or makes your PDFs "safe," whatever that means to you. | |
# | |
# You may need to enable execution of this script before using, | |
# eg. chmod +x $0 | |
# | |
# example: | |
# clean current directory: | |
# $ $0 | |
# | |
# clean specific directory: | |
# $ $0 some/other/directory | |
# -------------------------------------------------------------------- | |
HELP | |
} | |
_clean_info() { | |
local tmp target | |
if [ $# = 1 ]; then | |
target="$1" | |
else | |
return 1 | |
fi | |
# output file as original filename with suffix _clean.pdf | |
tmp="${target%.*}_clean.pdf" | |
# remove the temporary file if it already exists | |
if [ -f "$tmp" ]; then | |
rm "$tmp" | |
fi | |
exiftool -q -all:all= "$target" -o "$tmp" | |
qpdf --linearize --replace-input "$tmp" || : | |
echo -e "=> ${GREEN}Processed${CLEAR}: ${GREEN}${tmp}${CLEAR}" | |
} | |
main() { | |
local tempfile f cnt | |
if ! command -v exiftool qpdf &> /dev/null; then | |
echo 'err: Please install: exiftool, qpdf' >&2 | |
return 1 | |
fi | |
# loop through all PDFs in first argument ($1), | |
# or use '.' (this directory) if not given | |
if [ $# -gt 1 ]; then | |
echo "$USAGE" >&2 | |
return 1 | |
elif [[ "$1" =~ ^-h(elp)?$ ]]; then | |
_help | |
return 0 | |
fi | |
DIR="${1:-.}" | |
if ! [ -d "$DIR" ]; then | |
echo "err: Target dir '${DIR}' is not found." >&2 | |
fi | |
echo "Cleaning PDFs in directory $DIR" | |
# use find to locate files, pip to while read to get the | |
# whole line instead of space delimited | |
# Note -- this will find pdfs recursively!! | |
tempfile="$(mktemp)" | |
find "$DIR" -type f -name "*.pdf" > "$tempfile" | |
file_num="$(wc -l < "$tempfile")" | |
cnt=1 | |
while read -r f; do | |
echo -e "[${cnt}/${file_num}]: ${RED}${f}${CLEAR}" | |
_clean_info "$f" | |
((cnt++)) | |
done < "$tempfile" | |
} | |
main "${@-}" | |
exit $? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment