Last active
November 7, 2022 11:05
-
-
Save FUSAKLA/3f322d478839cabef7ace64a4be9177a to your computer and use it in GitHub Desktop.
PostHog data retention cleanup job
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import logging | |
import os | |
from datetime import datetime, timedelta | |
import django | |
django.setup() | |
from posthog.models import Event, ElementGroup | |
from django.utils import timezone | |
max_age_days = int(os.getenv("POSTHOG_CLEANUP_OLDER_THAN_DAYS", 30)) | |
step_size = int(os.getenv("POSTHOG_CLEANUP_BATCH_SIZE", 1000)) | |
dry_run = True if os.getenv("POSTHOG_CLEANUP_DRY_RUN", "False").lower() in ["true", "yes", "1"] else False | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S ') | |
def get_events_older_than(older_than): | |
return Event.objects.filter(timestamp__lt=timezone.make_aware(datetime.now() - timedelta(older_than))).values_list( | |
'pk', flat=True) | |
def get_non_referenced_event_groups(): | |
event_group_hashes = Event.objects.all().values_list('elements_hash', flat=True) | |
return ElementGroup.objects.exclude(hash__in=list(event_group_hashes)) | |
def delete_items(item_type, items): | |
if dry_run: | |
logging.info("Skipping delete of items in dry run mode...") | |
return | |
item_type.objects.filter(id__in=list(items)).delete() | |
def delete_items_batched(item_type, items, logging_indent=6 * " "): | |
number_of_items = len(items) | |
logging.info("%sDeleting %d items of type %s using batches of %d size:", logging_indent, number_of_items, | |
item_type.__name__, step_size) | |
last_id = 0 | |
while last_id + step_size <= number_of_items: | |
delete_items(item_type, items[last_id:last_id + step_size]) | |
logging.info("%s %d%%", logging_indent, int(last_id / number_of_items * 100)) | |
last_id += step_size | |
delete_items(item_type, items[last_id:]) | |
logging.info("%s 100%%", logging_indent) | |
if __name__ == "__main__": | |
logging.info("Running cleanup of PostHog...") | |
start_time = datetime.now() | |
logging.info(" - Deleting all events older than %d days:", max_age_days) | |
delete_items_batched(Event, get_events_older_than(max_age_days)) | |
logging.info(" - Deleting all elements and element groups not referenced by any event anymore:") | |
delete_items_batched(ElementGroup, get_non_referenced_event_groups()) | |
logging.info("Cleanup finished, total duration: %s", datetime.now() - start_time) |
Hi, the snippet is actually quite old, so definitely there might be some compatibility issues.
I wrote it in Sep 3, 2020 for the current release at that time, not sure which exactly it was at that time.
But I have it still running with latest 1.26.0 version and seems to be working just fine 🤔
It might be an issue with imports, try running it as python posthog_cleanup.py
in the directory where the manage.py
is of the app.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
from posthog.models import Event, ElementGroup is throwing error as there is no models module in posthog library - can you please confirm which version of posthog library are you using?