Skip to content

Instantly share code, notes, and snippets.

@kyle0r
Last active December 18, 2021 15:08
Show Gist options
  • Save kyle0r/a6e29d2abf3da23e4c113c7897e5c351 to your computer and use it in GitHub Desktop.
Save kyle0r/a6e29d2abf3da23e4c113c7897e5c351 to your computer and use it in GitHub Desktop.
Integrating Nets AFCR - Automated Fraud and Chargeback Reporting
#!/bin/bash
# :set ts=2 sw=2 expandtab number autoindent|colorscheme darkblue
clear
set -u
set -o pipefail
# https://superuser.com/a/1183819
[ "systemd" == "$(ps --no-headers -o comm 1)" ] || { echo "exiting. AFCR depends on systemd for user services and timers."; exit 1; }
systemctl --user 1>/dev/null 2>&1 || { echo "exiting. issue detected with systemdctl --user commands"; exit 1; }
detected_os=$(hostnamectl |grep Operating|cut -d' ' -f5-|tr '[:upper:]' '[:lower:]')
function is_debian_or_ubuntu() {
if [[ "$detected_os" =~ ubuntu ]] || [[ "$detected_os" =~ debian ]]; then
return 0
fi
return 1
}
#is_debian_or_ubuntu || { echo "exiting. unsupported os detected."; exit 1; }
[ 0 == "$UID" ] && { echo "exiting. root user detected. It is recommended to run this script with a non-root user."; exit 1; }
# https://stackoverflow.com/a/35412000
function is_variable_set() {
declare -p "$1" &>/dev/null
}
# systemd service directives ExecStart and WorkingDirectory do not permit dynamic paths.
# Therefore implementation logic expects there to be a constant path $HOME/nets-afcr-s3/data.
# Therefore symlinks are used to provide systemd with constant paths.
# an alt would be to find+replace on the .service during install, more complex?
SYSTEMD_AFCR_INSTALL_PATH="${HOME}/nets-afcr-s3-systemd-path"
INSTALL_PATH="${INSTALL_PATH:-${HOME}/nets-afcr-s3}"
DEFAULT_DATA_PATH=/dev/shm/nets-afcr-s3
DATA_PATH="${DATA_PATH:-${DEFAULT_DATA_PATH}}"
# remove any single trailing slash
INSTALL_PATH=${INSTALL_PATH%/}
DATA_PATH=${DATA_PATH%/}
mkdir -p "$INSTALL_PATH"
[ -d "$INSTALL_PATH" ] || { echo "exiting. install path not a dir."; exit 1; }
[ -h "${INSTALL_PATH}/data" ] && rm "${INSTALL_PATH}/data" 2>/dev/null
mkdir -p "$DATA_PATH"
[ -d "$DATA_PATH" ] || { echo "exiting. data path not found."; exit 1; }
# create symlink if DATA_PATH is not the same as the symlink path.
[ "${DATA_PATH}" != "${INSTALL_PATH}/data" ] && ln -s "${DATA_PATH}" "$INSTALL_PATH/data" 2>/dev/null
[ -e "$INSTALL_PATH/data" ] || { echo "exiting. $INSTALL_PATH/data does not exist."; exit 1; }
# create/update the systemd constant path
ln -Tfs "$INSTALL_PATH" "$SYSTEMD_AFCR_INSTALL_PATH" 2>/dev/null
# generic pause
function generic_pause() {
read -rn1 -p 'Press CTRL+C to abort OR press any key to continue...'
printf -- "\\n\\n"
}
dpkg_q=$(command -v dpkg-query)
apt=$(command -v apt)
dnf=$(command -v dnf)
yum=$(command -v yum)
if [ -x "$dnf" ]; then
pkg_mgr="$dnf"
elif [ -x "$yum" ]; then
pkg_mgr="$yum"
elif [ -x "$apt" ]; then
pkg_mgr="$apt"
elif false; then
: # e.g. pacman for arch
else
echo "exiting. unable to determine the system package manager."
exit 1
fi
cat <<EOF
============================================
Welcome to the AFCR data consumer installer.
============================================
Docs: https://coda.io/@ff0/afcr-automated-fraud-and-chargeback-reporting
This script is designed to be re-run as many times as needed.
It also acts like an updater; latest versions will be installed.
The latest file(s) will be downloaded from the s3 bucket to DATA_PATH.
Existing files will be downloaded/overwritten if size or modified time differ.
INSTALL_PATH=$INSTALL_PATH
DATA_PATH=$DATA_PATH
NOTE: These env vars can be set prior to installer launch.
EOF
if [ "$DEFAULT_DATA_PATH" == "$DATA_PATH" ]; then
cat <<EOF
ATTENTION: the DATA_PATH default is non-persistent shared memory.
VOLATILE: files in memory will not survive a reboot.
INFOSEC: This is a security-by-design best practice.
EOF
fi
cat <<EOF
This installer aims to be as lightweight an maintainable as possible.
It does NOT use the official aws cli util because we only need simple s3
capabilities, aws cli is overkill and >120MB.
This installer does use the official aws boto3 python module.
This installer performs the following:
0) check for prerequisites and provide feedback on detected issues.
1) install prerequisites like python3, python3-pip and python3-venv.
2) setup a python virtual env in INSTALL_PATH to for an isolated and
maintainable install.
read more about phyton venv here: https://docs.python.org/3/library/venv.html
3) will prompt for your AWS S3 bucket details and credentials.
4) download the latest s3-helper.py from GitHub.
5) download the latest data from your bucket to the DATA_PATH (not a full sync).
6) install systemd --user services and timers for scheduled download of the
latest data.
EOF
generic_pause
clear
function is_installed() {
if [ -x "$dpkg_q" ]; then
# shellcheck disable=SC2016
if "$dpkg_q" -W --showformat='${db:Status-Status}' "$1" 2>/dev/null | grep --quiet '^installed$'; then
return 0
fi
elif [ -x "$dnf" ] || [ -x "$yum" ]; then
if "$pkg_mgr" --quiet list installed "$1" 1>/dev/null 2>&1; then
return 0
fi
else
echo "exiting. cannot determine how to check for installed packages. cannot continue."; exit 1
fi
return 1
}
function check_sudo_is_installed() {
sudo=$(command -v sudo)
[ -x "$sudo" ] || { echo "exiting. sudo not found"; exit 1; }
$sudo uptime 1>/dev/null 2>&1 || { echo "exiting. sudo rights issue detected."; exit 1; }
}
function install_prerequisites() {
#set -x
if is_variable_set pkg_to_install && [ "${#pkg_to_install[@]}" -gt 0 ]; then
check_sudo_is_installed
if [ -x "$apt" ] || [ -x "$yum" ] || [ -x "$dnf" ]; then
if ! "$sudo" "$pkg_mgr" install "${pkg_to_install[@]}"; then
echo "exiting. something went wrong with installing prerequisite packages."
printf "package list: %s" "${pkg_to_install[@]}"
exit 1
fi
elif false; then
:
else
echo "exiting. cannot determine how to install packages. cannot continue."
fi
fi # end if some pkg to install
#set +x
}
curl=$(command -v curl)
if ! [ -x "$curl" ]; then
if ! is_installed curl; then
pkg_to_install+=( curl )
fi
fi
if ! is_installed python3 ; then
pkg_to_install+=( python3 python3-pip )
elif ! is_installed python3-pip ; then
pkg_to_install+=( python3-pip )
fi
if is_debian_or_ubuntu; then
is_installed python3-venv || pkg_to_install+=( python3-venv )
fi
install_prerequisites
### # python3-venv exist in repo but not-installed.
### if ! is_installed python3-venv; then
###
### "$sudo" "$apt" install python3-pip python3-venv || { echo "exiting. something went wrong with installing python3 and python3-venv."; exit 1;}
###
#### # python3-virtualenv is not an available package, so fallback to pip
#### elif is_not_installed python3-virtualenv; then
####
#### "$sudo" "$apt" install python3-pip python3-virtualenv || { echo "exiting. something went wrong with installing python3 and python3-pip."; exit 1;}
####
### else
### echo "exiting. cannot determine prerequisite install strategy."
### fi
curl=$(command -v curl); [ -x "$curl" ] || { echo "exiting. curl not found. please install it."; exit 1; }
python=$(command -v python3); [ -x "$python" ] || { echo "exiting. python3 not found. cannot continue."; exit 1; }
"$python" -m pip help 1>/dev/null 2>&1 || { echo "exiting. something went wrong checking if python3-pip is available."; exit 1; }
"$python" -m venv -h 1>/dev/null 2>&1 || { echo "exiting. something went wrong checking if python3-venv is available."; exit 1; }
"$python" -m venv "${INSTALL_PATH}/venv" || { echo "exiting. something went wrong creating python venv."; exit 1; }
# shellcheck source=/dev/null
source "${INSTALL_PATH}/venv/bin/activate"
# now inside the venv
# execute individually in case tmp space is small/low
python3 -m pip install --upgrade pip || { echo "exiting. something went wrong installing/updating python pip."; exit 1; }
python3 -m pip install --upgrade boto3 || { echo "exiting. something went wrong installing/updating python package boto3."; exit 1; }
printf "\\nPlease enter the bucket info you have been provided:\\n\\n"
set +u
while [ -z "$BUCKET" ]; do read -rp "enter aws bucket name: " BUCKET; export BUCKET; echo; echo "value: $BUCKET"; done
while [ -z "$AWS_DEFAULT_REGION" ]; do read -rp "enter aws bucket region: " AWS_DEFAULT_REGION; export AWS_DEFAULT_REGION; echo; echo "value: $AWS_DEFAULT_REGION"; done
printf "\\nPlease copy/paste the aws credentails you have been provided:\\ninput will not be echoed.\\n\\n"
while [ -z "$AWS_ACCESS_KEY_ID" ]; do read -rsp "(no echo) enter aws access key id: " AWS_ACCESS_KEY_ID; export AWS_ACCESS_KEY_ID; printf '\nread bytes: %s\n' "$(echo -n "$AWS_ACCESS_KEY_ID"|wc -c)"; done
while [ -z "$AWS_SECRET_ACCESS_KEY" ]; do read -rsp "(no echo) enter aws secret access key: " AWS_SECRET_ACCESS_KEY; export AWS_SECRET_ACCESS_KEY; printf '\nread bytes: %s\n' "$(echo -n "$AWS_SECRET_ACCESS_KEY"|wc -c)"; done
set -u
curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/s3-helper.py > "${INSTALL_PATH}"/s3-helper.py
chmod +x "${INSTALL_PATH}/s3-helper.py"
[ -x "${INSTALL_PATH}/s3-helper.py" ] || { echo "exiting. s3-helper.ph not found or not executable."; exit 1; }
cd "$DATA_PATH" || { echo "exiting. could not cd to $DATA_PATH.."; exit 1; }
cat <<EOF
The base install is completed.
INSTALL_PATH=$INSTALL_PATH
DATA_PATH=$DATA_PATH
Now the systemd --user services and timers will be installed.
EOF
generic_pause
#set -x
# mkdir for systemd user service
mkdir -p ~/.config/systemd/user ~/.config/systemd/user/nets-afcr.service.d
# install systemd --user service
curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.service > ~/.config/systemd/user/nets-afcr.service
cat <<EOF > ~/.config/systemd/user/nets-afcr.service.d/override.conf
[Service]
Environment=BUCKET=${BUCKET}
Environment=AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}
Environment=AWS_ACCESS_KEY_ID='${AWS_ACCESS_KEY_ID}'
Environment=AWS_SECRET_ACCESS_KEY='${AWS_SECRET_ACCESS_KEY}'
EOF
# reload --user systemd
systemctl --user daemon-reload
clear
cat <<EOF
systemd --user service install completed.
service will now be started, pay attention to any errors.
The following journal entry means the install and start was successful:
"Started AFCR service." or "Finished AFCR service." or similar.
EOF
generic_pause
# do a one-time functional check service start, and follow/tail the journal to verify service starts OK.
systemctl --user start nets-afcr.service ; journalctl --no-pager --user-unit nets-afcr.service
printf "\\n\\n%s\\n\\n" "if the install and service start was successful please continue."
generic_pause
# install systemd --user timer
curl --silent --show-error -L https://gist.githubusercontent.com/kyle0r/a6e29d2abf3da23e4c113c7897e5c351/raw/nets-afcr.timer > ~/.config/systemd/user/nets-afcr.timer
# reload --user systemd
systemctl --user daemon-reload
# enable the timer, so it will survive reboots
systemctl --user enable nets-afcr.timer
# start the timer
systemctl --user start nets-afcr.timer
# check the timer is configued as expected
systemctl --user --all list-timers
cat <<EOF
the systemd --user timer is now installed.
You should see it listed above. You may wish to adjust the timer schedule.
$ systemctl --user edit --full nets-afcr.timer
This script will now exit.
Tip: at your convenience reboot the system to verify the timer is active and configured as expected.
To check timers:
$ systemctl --all --user list-timers
EOF
[Unit]
Description=AFCR service
[Service]
Type=oneshot
WorkingDirectory=-%h/nets-afcr-s3-systemd-path/data
ExecStartPre=/bin/mkdir -p /dev/shm/nets-afcr-s3
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/daily
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/fraud/monthly
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/daily
ExecStart=%h/nets-afcr-s3-systemd-path/venv/bin/python3 %h/nets-afcr-s3-systemd-path/s3-helper.py --log-level=WARNING --region=${AWS_DEFAULT_REGION} --bucket=${BUCKET} --command=latest --prefix=afcr/cbk/monthly
[Install]
WantedBy=default.target
[Unit]
Description=AFCR timer
Requires=nets-afcr.timer
[Timer]
Unit=nets-afcr.service
# daily
OnCalendar=*-*-* 03:00:00 UTC
[Install]
WantedBy=timers.target
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""python productivity tool for downloading files from s3 buckets"""
"""
vim settings suggestion:
:set ts=2 sw=2 expandtab number autoindent|colorscheme darkblue
"""
import os, sys
import boto3
import argparse
import logging
ENV = os.getenv
ARGS = None
ARGV = sys.argv.copy()
SCRIPT_NAME = os.path.basename(ARGV.pop(0))
COMMANDS = [\
('latest', 'download_latest_object')\
,('sync', 'sync_prefix')\
]
LOGLEVELS = [\
('DEBUG', logging.DEBUG), ('INFO', logging.INFO), \
('WARNING', logging.WARNING), ('ERROR', logging.ERROR), \
('CRITICAL', logging.CRITICAL) \
]
FORMAT = '%(levelname)s\t%(asctime)-15s\t%(message)s'
logger = logging.getLogger(SCRIPT_NAME)
# set the root level to highest mode, and control the levels
# with the specific handlers. The root controls the MAX logging
# level, so if its set to wARNING, then WARNING is the MAX level.
# https://stackoverflow.com/q/17668633
logger.setLevel(logging.DEBUG)
LOG_FORMATTER = logging.Formatter(FORMAT)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)
ch.setFormatter(LOG_FORMATTER)
logger.addHandler(ch)
# simple var to track current log level
currentLogLevel = logging.WARNING
def download_latest_object():
"""
cite: https://stackoverflow.com/a/53423319
"""
logger.info('attempting to download the latest file from s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
get_last_modified = lambda obj: int(obj['LastModified'].strftime('%s'))
s3 = boto3.client('s3', region_name=ARGS.region)
paginator = s3.get_paginator( "list_objects_v2" )
# Delimiter='/' prevents recursion
# doc: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Paginator.ListObjectsV2.paginate
page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='/', Prefix=ARGS.prefix)
for page in page_iterator:
if "Contents" in page:
latest_obj = [obj for obj in sorted( page["Contents"], key=get_last_modified)][-1]
if not 'latest_obj' in locals() or latest_obj['Key'].endswith('/') or os.path.isdir(latest_obj['Key']):
logger.warning('exiting. no object(s) detected at prefix s3://%s/%s' % (ARGS.bucket, ARGS.prefix) )
return
logger.info('attempting to download latest detected key: %s LastModified: %s' % (latest_obj['Key'], latest_obj['LastModified']))
download_object_if_needed(latest_obj, s3)
def download_object(obj, s3):
"""
download an s3 object, making dirs as required.
a managed transfer which will perform a multipart download in multiple threads if necessary.
ref: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.download_fileobj
"""
dirname = os.path.dirname(obj['Key'])
path_exists = os.path.exists(dirname)
if not '' == dirname and not path_exists and not os.path.isdir(dirname):
os.makedirs(dirname)
with open(obj['Key'], 'wb') as f:
logger.info('writing local key: %s' % obj['Key'])
s3.download_fileobj(ARGS.bucket, obj['Key'], f)
t = int(obj['LastModified'].strftime('%s'))
os.utime( obj['Key'], times=(t, t) )
def download_object_if_needed(obj, s3):
s3_key = obj['Key']
local_exists = os.path.exists(s3_key)
s3_mtime = int(obj['LastModified'].strftime('%s'))
s3_size = int(obj['Size'])
logger.info('key: %s, exists local: %s, s3 timestamp: %s, s3 size: %i' % ( s3_key, local_exists, s3_mtime, s3_size))
if local_exists:
local_mtime = int(os.path.getmtime(s3_key))
local_size = int(os.path.getsize(s3_key))
logger.info('local key: %s, local mtime: %i, local size: %i' % ( s3_key, local_mtime, local_size))
if local_mtime == s3_mtime and local_size == s3_size:
logger.info('local key: %s already exists with idential timestamp and size, skipping.' % s3_key)
else:
download_object(obj, s3)
else:
download_object(obj, s3)
def sync_prefix():
"""
boto3 does not have an s3 sync method like the aws cli.
here is a simple rsync-like implementation.
ref: https://github.com/boto/boto3/issues/358
logic: does s3 and local modified time and size differ? yes=overwrite no=skip.
empty remote prefixes are not created locally.
it would be possible to update to logic to also compare checksums
ref: https://zihao.me/post/calculating-etag-for-aws-s3-objects/
"""
logger.info('attempting to sync src s3://%s/%s to local dst' % (ARGS.bucket, ARGS.prefix) )
s3 = boto3.client('s3', region_name=ARGS.region)
paginator = s3.get_paginator( "list_objects_v2" )
page_iterator = paginator.paginate(Bucket=ARGS.bucket, Delimiter='', Prefix=ARGS.prefix)
for page in page_iterator:
if "Contents" in page:
for obj in page["Contents"]:
if not obj['Key'].endswith('/'):
download_object_if_needed(obj, s3)
def setLogLevel(newLevel, log_object):
for level in LOGLEVELS:
lvl, val = level
if newLevel == lvl:
log_object.setLevel(val)
globals()['currentLogLevel'] = val
return
def main():
parser = argparse.ArgumentParser(\
description='''\
python productivity tool for downloading files from s3 buckets.
files are downloaded to the current working dir.
any relative prefix paths will be created automatically.
multipart objects are natively supported supported.
logic: does s3 and local modified time and size differ? yes=overwrite no=skip.
the "latest" command is non-recursive.
the "sync" command is recursive.
both support --prefix.
I suggest to use env vars or cfg files to set the required sensitive config vars.
The script requires at least AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY or equivilent cfg file.
cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-environment-variables
cite: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#using-a-configuration-file
'''
,epilog='''Author: Kyle M <[email protected]>'''
,formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--log-level', metavar='<level>', dest='log_level', action='store',
help='modify the console log level.', default='WARNING',
choices=[key for key, val in LOGLEVELS])
parser.add_argument('--command', metavar='<command>', dest='command', action='store',
help='the command to execute.', required=True,
choices=[key for key, val in COMMANDS])
parser.add_argument('--bucket', metavar='<bucket>', dest='bucket', action='store',
help='the source s3 bucket.', required=True)
parser.add_argument('--region', metavar='<region>', dest='region', action='store',
help='the source s3 bucket region.', required=True)
parser.add_argument('--prefix', metavar='<prefix>', dest='prefix', action='store',
help='bucket key prefix.', required=False, default='')
global ARGS
ARGS = parser.parse_args(ARGV)
if ARGS.log_level:
setLogLevel(ARGS.log_level, ch)
if ARGS.prefix.startswith('/'):
ARGS.prefix = ARGS.prefix[1:]
elif ARGS.prefix and not ARGS.prefix.endswith('/'):
ARGS.prefix += '/'
for cmd in COMMANDS:
key, val = cmd
if ARGS.command == key:
globals()[val]()
break
logger.info('"%s" command completed. exiting.' % ARGS.command)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment