ansemjo/humbleparser.py

## humbleparser.py
#!/usr/bin/env python3

# download multiple books from a humblebundle books page at once
# the "bulk download" button on their site is useless ...
# you *might* need to copy / export the page html from developer tools

from pyquery import PyQuery as pq
import sys, json

# read in page html from stdin
page = pq(sys.stdin.read())

# get the download rows
rows = [pq(r) for r in page("div.row")]

# get the downloads with title and links from rows
downloads = [{ "title": r("div.title > a").text(), "downloads": [pq(a).attr("href") for a in r("div.download a.a")] } for r in rows]

# dump json structure to stdout
json.dump(downloads, sys.stdout)


# now you can export a plaintext list for parsing in
# bash from this json with a little more python:
#
#   for book in downloads:
#     for link in book["downloads"]:
#       print(link, book["title"])
#
# and then download books of each filetype with a simple while-read-do-loop:
#
#   grep '\.epub' downloads.txt | while read link title; do echo "$title"; curl -# -o "$title.epub" "$link"; done
#   grep '\.pdf'  downloads.txt | while read link title; do echo "$title"; curl -# -o "$title.pdf"  "$link"; done
	#!/usr/bin/env python3

	# download multiple books from a humblebundle books page at once
	# the "bulk download" button on their site is useless ...
	# you might need to copy / export the page html from developer tools

	from pyquery import PyQuery as pq
	import sys, json

	# read in page html from stdin
	page = pq(sys.stdin.read())

	# get the download rows
	rows = [pq(r) for r in page("div.row")]

	# get the downloads with title and links from rows
	downloads = [{ "title": r("div.title > a").text(), "downloads": [pq(a).attr("href") for a in r("div.download a.a")] } for r in rows]

	# dump json structure to stdout
	json.dump(downloads, sys.stdout)


	# now you can export a plaintext list for parsing in
	# bash from this json with a little more python:
	#
	# for book in downloads:
	# for link in book["downloads"]:
	# print(link, book["title"])
	#
	# and then download books of each filetype with a simple while-read-do-loop:
	#
	# grep '\.epub' downloads.txt \| while read link title; do echo "$title"; curl -# -o "$title.epub" "$link"; done
	# grep '\.pdf' downloads.txt \| while read link title; do echo "$title"; curl -# -o "$title.pdf" "$link"; done