Wamy-Dev/README.md

## README.md

      
    Raw
  

              README.md
            
          
    NHentai Archiver

Since the latest DMCA's on NHentai, I thought it might be good to revisit my previous archiver. This one took me only about 15 minutes to write, thanks to the downloader by RicterZ. Fantastic project. This will simply wrap this and do some extras to make sure that everything is downloaded.
Steps


Get your user agent and cookie from NHentai.net. This is explained in the downloader repo above. This does require registering on the website.
Add your proxy, if you don't have (or need) a proxy, comment out Line 51.
Set the output folder. This can be relative or absolute.
Set a max. This prevents overshooting the available doujins, and prevents marking future doujins as missing.
Simply run the script with python3 main.py
Optional: Set a number of threads higher than 5. It will make downloads go faster, if on Proxy, this is perfect, max this out to as high as your CPU will allow.


## main.py
USER_AGENT="Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0"
COOKIE="<COOKIE>"
PROXY="<PROXY>"
MAX_RANGE=528482
OUTPUT_DIR="<OUTPUTDIR>"
MAX_THREADS=5

import subprocess
import pickledb
import os
import concurrent.futures

def markMissing(page: str):
    db = pickledb.load('invalid.db', True, False)
    db.set(page, True)
    db.dump()

def checkMissing(page: str):
    db = pickledb.load('invalid.db', True)
    try:
        missing = db.get(page)
    except Exception:
        missing = False
    if missing:
        print(f"{page} is marked as missing. https://nhentai.net/g/{page}.")
        return True
    return False

def download(page: int):
    try:
        print(f"Downloading {page}. https://nhentai.net/g/{page}.")
        process = subprocess.Popen(f"nhentai --id {page} --output {OUTPUT_DIR} --cbz --pdf --meta --regenerate-cbz", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        out, err = process.communicate()
        result_lines = out.decode().split("\n")
        err_line = result_lines[-3]
        find = err_line.find(f"{page} cannot be found")
        if find != -1:
            markMissing(f"{page}")
            print(f"Marked {page} as missing. https://nhentai.net/g/{page}.")
        else:
            print(f"Downloaded {page}. https://nhentai.net/g/{page}.")
        return
    except Exception as e:
        print(f"Error downloading {page}. https://nhentai.net/g/{page}.")
        print(e)
        return

def main():
    subprocess.run(["nhentai", "--cookie", COOKIE])
    subprocess.run(["nhentai", "--useragent", USER_AGENT])
    subprocess.run(["nhentai", "--proxy", PROXY])

    pool = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS)

    for i in range(MAX_RANGE):
        if checkMissing(f"{i}"):
            continue
        pool.submit(download, i)

    pool.shutdown(wait=True)

    subprocess.run(["nhentai", "--html"])
    subprocess.run(["nhentai", "--gen-main"])
    print("Finished. Exiting.")


if __name__ == '__main__':
    main()
	USER_AGENT="Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0"
	COOKIE="<COOKIE>"
	PROXY="<PROXY>"
	MAX_RANGE=528482
	OUTPUT_DIR="<OUTPUTDIR>"
	MAX_THREADS=5

	import subprocess
	import pickledb
	import os
	import concurrent.futures

	def markMissing(page: str):
	db = pickledb.load('invalid.db', True, False)
	db.set(page, True)
	db.dump()

	def checkMissing(page: str):
	db = pickledb.load('invalid.db', True)
	try:
	missing = db.get(page)
	except Exception:
	missing = False
	if missing:
	print(f"{page} is marked as missing. https://nhentai.net/g/{page}.")
	return True
	return False

	def download(page: int):
	try:
	print(f"Downloading {page}. https://nhentai.net/g/{page}.")
	process = subprocess.Popen(f"nhentai --id {page} --output {OUTPUT_DIR} --cbz --pdf --meta --regenerate-cbz", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
	out, err = process.communicate()
	result_lines = out.decode().split("\n")
	err_line = result_lines[-3]
	find = err_line.find(f"{page} cannot be found")
	if find != -1:
	markMissing(f"{page}")
	print(f"Marked {page} as missing. https://nhentai.net/g/{page}.")
	else:
	print(f"Downloaded {page}. https://nhentai.net/g/{page}.")
	return
	except Exception as e:
	print(f"Error downloading {page}. https://nhentai.net/g/{page}.")
	print(e)
	return

	def main():
	subprocess.run(["nhentai", "--cookie", COOKIE])
	subprocess.run(["nhentai", "--useragent", USER_AGENT])
	subprocess.run(["nhentai", "--proxy", PROXY])

	pool = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS)

	for i in range(MAX_RANGE):
	if checkMissing(f"{i}"):
	continue
	pool.submit(download, i)

	pool.shutdown(wait=True)

	subprocess.run(["nhentai", "--html"])
	subprocess.run(["nhentai", "--gen-main"])
	print("Finished. Exiting.")


	if __name__ == '__main__':
	main()