Created
February 18, 2019 10:03
-
-
Save jorgeas80/588aa6d5f742125dce4a598c7fec6cdc to your computer and use it in GitHub Desktop.
Script to delete duplicate entries in your wallabag account
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Requires Python > 3.5.2 | |
import aiohttp | |
import asyncio | |
from wallabag_api.wallabag import Wallabag | |
# If you use wallabag.it this is: https://app.wallabag.it | |
my_host = 'MY_HOST' | |
# Default get_entries query fetches just 30 entries. I guess you can use any big enough number here | |
# to just make one request to get all entries. | |
ITEMS_PER_PAGE = 2137 | |
async def main(loop): | |
# IMPORTANT: Define callback url when you create client id and secret or you'll get HTTP 400 getting the token | |
params = {'username': 'MY_USERNAME', | |
'password': 'MY_PASSWORD', | |
'client_id': 'CLIENT_ID', | |
'client_secret': 'CLIENT_SECRET'} | |
duplicates = {} | |
urls_to_delete = [] | |
# get a new token | |
token = await Wallabag.get_token(host=my_host, **params) | |
# initializing | |
async with aiohttp.ClientSession(loop=loop) as session: | |
wall = Wallabag(host=my_host, | |
client_secret=params.get('client_secret'), | |
client_id=params.get('client_id'), | |
token=token, | |
aio_sess=session) | |
# get all the articles | |
my_wallabag = await wall.get_entries(perPage=ITEMS_PER_PAGE) | |
all_article = my_wallabag['_embedded']['items'] | |
# create duplicates dict | |
for article in all_article: | |
if article['url'] not in duplicates: | |
duplicates[article['url']] = [] | |
duplicates[article['url']].append(article['id']) | |
# Delete duplicates | |
for url, articles in duplicates.items(): | |
n = len(articles) | |
if n > 1: | |
for article_id in articles[1:]: | |
d = await wall.delete_entries(article_id) | |
print("Done") | |
if __name__ == '__main__': | |
loop = asyncio.get_event_loop() | |
loop.run_until_complete(main(loop)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment