Skip to content

Instantly share code, notes, and snippets.

@jorgeas80
Created February 18, 2019 10:03
Show Gist options
  • Save jorgeas80/588aa6d5f742125dce4a598c7fec6cdc to your computer and use it in GitHub Desktop.
Save jorgeas80/588aa6d5f742125dce4a598c7fec6cdc to your computer and use it in GitHub Desktop.
Script to delete duplicate entries in your wallabag account
#!/usr/bin/env python
# Requires Python > 3.5.2
import aiohttp
import asyncio
from wallabag_api.wallabag import Wallabag
# If you use wallabag.it this is: https://app.wallabag.it
my_host = 'MY_HOST'
# Default get_entries query fetches just 30 entries. I guess you can use any big enough number here
# to just make one request to get all entries.
ITEMS_PER_PAGE = 2137
async def main(loop):
# IMPORTANT: Define callback url when you create client id and secret or you'll get HTTP 400 getting the token
params = {'username': 'MY_USERNAME',
'password': 'MY_PASSWORD',
'client_id': 'CLIENT_ID',
'client_secret': 'CLIENT_SECRET'}
duplicates = {}
urls_to_delete = []
# get a new token
token = await Wallabag.get_token(host=my_host, **params)
# initializing
async with aiohttp.ClientSession(loop=loop) as session:
wall = Wallabag(host=my_host,
client_secret=params.get('client_secret'),
client_id=params.get('client_id'),
token=token,
aio_sess=session)
# get all the articles
my_wallabag = await wall.get_entries(perPage=ITEMS_PER_PAGE)
all_article = my_wallabag['_embedded']['items']
# create duplicates dict
for article in all_article:
if article['url'] not in duplicates:
duplicates[article['url']] = []
duplicates[article['url']].append(article['id'])
# Delete duplicates
for url, articles in duplicates.items():
n = len(articles)
if n > 1:
for article_id in articles[1:]:
d = await wall.delete_entries(article_id)
print("Done")
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment