Skip to content

Instantly share code, notes, and snippets.

@alashow
Created March 12, 2023 00:39
Show Gist options
  • Save alashow/615a7a8647af2bd4758dcc9e471e5c52 to your computer and use it in GitHub Desktop.
Save alashow/615a7a8647af2bd4758dcc9e471e5c52 to your computer and use it in GitHub Desktop.
Get a diff of plex database media items by checking guids
import sqlite3
from tqdm import tqdm
from multiprocessing import cpu_count, Pool
def get_db(db_path):
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
return cursor
def is_relevant_section_type(item):
guid, title = item
section_types = ['movie', 'show', 'season', 'episode']
try:
if guid.split('/')[2] not in section_types:
return False, item
return True, item
except:
return False, item
def get_media_items(cursor, section_ids=[], filterOutIrrelevantSections=False):
query = "SELECT guid, title FROM metadata_items"
if section_ids:
query += " WHERE library_section_id IN ({})".format(','.join([str(section_id) for section_id in section_ids]))
query += " GROUP BY guid ORDER BY guid ASC"
cursor.execute(query)
items = cursor.fetchall()
if filterOutIrrelevantSections:
pool = Pool(cpu_count())
# filter out not needed item types
for result, item in tqdm(pool.imap_unordered(is_relevant_section_type, items), total=len(items)):
if result is False:
items.remove(item)
return items
def get_media_files(cursor, guids=[]):
query = """
SELECT media_parts.file, media_parts.size
FROM media_parts
INNER JOIN media_items ON media_items.id = media_parts.media_item_id
INNER JOIN metadata_items ON metadata_items.id = media_items.metadata_item_id
WHERE metadata_items.guid IN ({})
""".format(','.join(["'{}'".format(guid) for guid in guids]))
cursor.execute(query)
return cursor.fetchall()
def get_media_parts_size_in_terabytes(media_parts):
size = sum([media_part[1] or 0 for media_part in media_parts]) / 1024 / 1024 / 1024 / 1024
return f"{size:.2f} TB"
if __name__ == '__main__':
database_a_filename = "plex-avengers.db"
database_b_filename = "plex-nandor.db"
database_a = get_db(database_a_filename)
database_b = get_db(database_b_filename)
non_indian_section_ids = [1, 6, 7, 8, 9, 10, 11, 14, 17, 18, 20, 21, 22, 23]
media_items_a = get_media_items(database_a, section_ids=non_indian_section_ids)
media_items_b = get_media_items(database_b)
# get differences & common items
media_items_a_guids = set([media_item[0] for media_item in media_items_a])
media_items_b_guids = set([media_item[0] for media_item in media_items_b])
media_items_a_guids_not_in_b = media_items_a_guids - media_items_b_guids
media_items_b_guids_not_in_a = media_items_b_guids - media_items_a_guids
media_items_common_to_a_and_b = media_items_a_guids & media_items_b_guids
media_parts_a = get_media_files(database_a, guids=media_items_a_guids)
media_parts_b = get_media_files(database_b, guids=media_items_b_guids)
media_parts_a_not_in_b = get_media_files(database_a, guids=media_items_a_guids_not_in_b)
media_parts_b_not_in_a = get_media_files(database_b, guids=media_items_b_guids_not_in_a)
media_parts_a_common = get_media_files(database_a, guids=media_items_common_to_a_and_b)
media_parts_b_common = get_media_files(database_b, guids=media_items_common_to_a_and_b)
media_parts_a_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_a)
media_parts_b_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_b)
media_parts_a_not_in_b_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_a_not_in_b)
media_parts_b_not_in_a_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_b_not_in_a)
media_parts_a_common_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_a_common)
media_parts_b_common_file_size_terabytes = get_media_parts_size_in_terabytes(media_parts_b_common)
# print totals & differences count * percentages, with good formatting
# total media items in a
total_media_items_a = len(media_items_a)
print(f"Total media items in a: {total_media_items_a}, ({database_a_filename})")
print(f"Total media items in b: {len(media_items_b)}, ({database_b_filename})")
print(f"Total media parts in a: {len(media_parts_a)}, {media_parts_a_file_size_terabytes}")
print(f"Total media parts in b: {len(media_parts_b)}, {media_parts_b_file_size_terabytes}")
print(f"Media items in a not in b: {len(media_items_a_guids_not_in_b)} ({len(media_items_a_guids_not_in_b) / total_media_items_a * 100:.2f}%)")
print(f"Media items in b not in a: {len(media_items_b_guids_not_in_a)} ({len(media_items_b_guids_not_in_a) / total_media_items_a * 100:.2f}%)")
media_items_a_guids_not_in_b_file = f"{database_a_filename}-files-not-in-{database_b_filename}.txt"
media_items_b_guids_not_in_a_file = f"{database_b_filename}-files-not-in-{database_a_filename}.txt"
print(f"Media parts in a not in b: {len(media_parts_a_not_in_b)} {media_parts_a_not_in_b_file_size_terabytes} ({len(media_parts_a_not_in_b) / total_media_items_a * 100:.2f}%)")
print(f"Media parts in b not in a: {len(media_parts_b_not_in_a)} {media_parts_b_not_in_a_file_size_terabytes} ({len(media_parts_b_not_in_a) / total_media_items_a * 100:.2f}%)")
print(f"Media items common to a and b: {len(media_items_common_to_a_and_b)} ({len(media_items_common_to_a_and_b) / total_media_items_a * 100:.2f}%), sizes are {media_parts_a_common_file_size_terabytes} and {media_parts_b_common_file_size_terabytes} respectively")
with open(media_items_a_guids_not_in_b_file, "w") as f:
for file_name, file_size in media_parts_a_not_in_b:
f.write(file_name+"\n")
with open(media_items_b_guids_not_in_a_file, "w") as f:
for file_name, file_size in media_parts_b_not_in_a:
f.write(file_name+"\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment