yunruse/yt.py

## yt.py
#!/usr/bin/env python3

# pip install scrapetube

# for basic operation, use `python -m http.server --cgi`
# and access /cgi-bin/yt.py?c={CHANNEL_ID}

# Note that due to scrapetube limits, dates are fetched as eg "3 days ago"
# so the further in the past a video is, the less precise the date will be.

from email.utils import format_datetime
from datetime import datetime, timedelta
import cgi

import scrapetube

def determine_date(string: str, dt: datetime):
    "Return best guess at date of video release."
    assert len(string.split()) == 3
    num, unit, ago = string.split()
    assert ago == 'ago'
    unit = unit.removesuffix('s')
    num = int(num)

    UNITS = {
        'second': timedelta(seconds=1),
        'minute': timedelta(seconds=60),
        'hour': timedelta(seconds=3600),
        'day': timedelta(days=1),
        'week': timedelta(days=7),
        'month': timedelta(days=31),
        'year': timedelta(days=365),
    }
    assert unit in UNITS
    dt = dt or datetime.today()
    return dt - (num * UNITS[unit])

FETCH_LIMIT = 15
DAY_LIMIT = 100

def obtain(channel_id: str):
    dt = datetime.now()
    for v in scrapetube.get_channel(channel_id, limit=FETCH_LIMIT):
        dt = determine_date(v['publishedTimeText']['simpleText'], dt)
        yield dt, v

RSS = """
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
  <title>Automatically-fetched Youtube Channel</title>
  <link>https://youtube.com/channel/{channel_id}</link>
  {items}
</channel>
</rss>
"""
ITEM = """
<item>
<title>{title}</title>
<link>https://youtube.com/watch?v={video_id}</link>
<description>{description}</description>
<pubDate>{date}</pubDate>
</item>"""

if __name__ == '__main__':
    print("Content-Type: application/rss+xml\n\n")
    data = cgi.FieldStorage()
    channel_id = data.getfirst('c')

    ITEMS = [
        ITEM.format(
            title=v['title']['runs'][0]['text'],
            description=f"<img src=\"{v['thumbnail']['thumbnails'][-1]['url']}\">",
            video_id=v['videoId'],
            date=format_datetime(dt)
            )
        for dt, v in obtain(channel_id)
    ]
    print(RSS.format(channel_id=channel_id, items='\n'.join(ITEMS)))
	#!/usr/bin/env python3

	# pip install scrapetube

	# for basic operation, use `python -m http.server --cgi`
	# and access /cgi-bin/yt.py?c={CHANNEL_ID}

	# Note that due to scrapetube limits, dates are fetched as eg "3 days ago"
	# so the further in the past a video is, the less precise the date will be.

	from email.utils import format_datetime
	from datetime import datetime, timedelta
	import cgi

	import scrapetube

	def determine_date(string: str, dt: datetime):
	"Return best guess at date of video release."
	assert len(string.split()) == 3
	num, unit, ago = string.split()
	assert ago == 'ago'
	unit = unit.removesuffix('s')
	num = int(num)

	UNITS = {
	'second': timedelta(seconds=1),
	'minute': timedelta(seconds=60),
	'hour': timedelta(seconds=3600),
	'day': timedelta(days=1),
	'week': timedelta(days=7),
	'month': timedelta(days=31),
	'year': timedelta(days=365),
	}
	assert unit in UNITS
	dt = dt or datetime.today()
	return dt - (num * UNITS[unit])

	FETCH_LIMIT = 15
	DAY_LIMIT = 100

	def obtain(channel_id: str):
	dt = datetime.now()
	for v in scrapetube.get_channel(channel_id, limit=FETCH_LIMIT):
	dt = determine_date(v['publishedTimeText']['simpleText'], dt)
	yield dt, v

	RSS = """
	<?xml version="1.0" encoding="UTF-8" ?>
	<rss version="2.0">
	<channel>
	<title>Automatically-fetched Youtube Channel</title>
	<link>https://youtube.com/channel/{channel_id}</link>
	{items}
	</channel>
	</rss>
	"""
	ITEM = """
	<item>
	<title>{title}</title>
	<link>https://youtube.com/watch?v={video_id}</link>
	<description>{description}</description>
	<pubDate>{date}</pubDate>
	</item>"""

	if __name__ == '__main__':
	print("Content-Type: application/rss+xml\n\n")
	data = cgi.FieldStorage()
	channel_id = data.getfirst('c')

	ITEMS = [
	ITEM.format(
	title=v['title']['runs'][0]['text'],
	description=f"<img src=\"{v['thumbnail']['thumbnails'][-1]['url']}\">",
	video_id=v['videoId'],
	date=format_datetime(dt)
	)
	for dt, v in obtain(channel_id)
	]
	print(RSS.format(channel_id=channel_id, items='\n'.join(ITEMS)))