Last active
November 26, 2022 15:37
-
-
Save jeeger/d13159fefaee33c771be979639900ebc to your computer and use it in GitHub Desktop.
Sum up page counts of books from Calibre library with babashka
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bb | |
(ns bookstats | |
(:require [babashka.pods :as pods] | |
[clojure.pprint :as pprint])) | |
(pods/load-pod 'org.babashka/go-sqlite3 "0.1.0") | |
(pods/load-pod 'retrogradeorbit/bootleg "0.1.9") | |
(require '[pod.babashka.go-sqlite3 :as sqlite] | |
'[pod.retrogradeorbit.hickory.select :as s] | |
'[pod.retrogradeorbit.bootleg.utils :as utils]) | |
(import [java.net URLEncoder] | |
[java.time.temporal ChronoUnit] | |
[java.time Instant] | |
[java.text SimpleDateFormat]) | |
(defn date-after [arg] | |
(fn [row] | |
(> (.compareTo (:timestamp row) arg) 0))) | |
(defn books-since [database since] | |
(filter (date-after since) | |
(sqlite/query database "select b.title, group_concat(a.name, ', ') as authors, b.timestamp from books b join books_authors_link bal on b.id = bal.book join authors a on bal.author = a.id group by b.id"))) | |
(defn book-urls [title authors] | |
(let [search-url (str "https://www.goodreads.com/search?q=" (URLEncoder/encode (str title " " authors) "utf-8") "&search_type=books")] | |
(as-> (slurp search-url) val | |
(utils/convert-to val :hickory-seq) | |
(mapcat #(s/select (s/and (s/tag "a") (s/class "bookTitle")) %) val) | |
(map #(get-in % [:attrs :href]) val) | |
(map #(str "https://goodreads.com" %) val)))) | |
(defn extract-page-count [url] | |
(try | |
(as-> (slurp url) val | |
(utils/convert-to val :hickory-seq) | |
(mapcat (fn [elem] (s/select (s/and (s/tag "span") (s/attr "itemprop" #(= % "numberOfPages"))) elem)) val) | |
(first val) | |
(get val :content) | |
(first val) | |
(re-find #"(\d+) pages" val) | |
(second val) | |
(Integer/parseInt val)) | |
(catch Exception e | |
(println (str "Error parsing page count from " url ": " e)) | |
nil))) | |
(defn book-with-page-count [{:keys [title authors]}] | |
(let [page-count (some extract-page-count (book-urls title authors))] | |
{:title title | |
:authors authors | |
:pages page-count})) | |
(defn books-since-report [database since] | |
(let [books-to-print (books-since database since)] | |
(map book-with-page-count books-to-print))) | |
(defn pretty-print-report [database since] | |
(let [table (books-since-report database since) | |
total (reduce + (map #(:pages %) table)) | |
perday (/ total (.between ChronoUnit/DAYS (.toInstant since) (Instant/now)))] | |
(pprint/print-table table) | |
(print (str "Total pages: " total ",per day: " (float perday))))) | |
(let [date-format (SimpleDateFormat. "yyyy-MM-dd")] | |
(pretty-print-report (first *command-line-args*) (.parse date-format (second *command-line-args*)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment