Skip to content

Instantly share code, notes, and snippets.

@Timvrakas
Created August 1, 2024 08:13
Show Gist options
  • Save Timvrakas/f624a2a69f23664d019ffd94974591b3 to your computer and use it in GitHub Desktop.
Save Timvrakas/f624a2a69f23664d019ffd94974591b3 to your computer and use it in GitHub Desktop.
from requests_html import HTMLSession
import requests
from bs4 import BeautifulSoup as bs # importing BeautifulSoup
import json
from IPython import embed
import time
import unicodedata
import re
def slugify(value):
value = str(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
base_url = "https://canvas.stanford.edu/api/v1/"
class BearerAuth(requests.auth.AuthBase):
def __init__(self):
self.token = '<TOKEN_HERE>'
def __call__(self, r):
r.headers["authorization"] = "Bearer " + self.token
return r
def final_page(response):
links = response.headers["link"].split(',')
links = [l.split(';') for l in links]
links = {l[1].strip():l[0] for l in links}
return links['rel="current"'] == links['rel="last"']
response = requests.get(base_url+'courses', auth=BearerAuth(),params={'per_page':'100'})
courses = json.loads(response.content)
real_courses = list()
locked = 0
for n in courses:
if 'name' in n:
#print(n['name'])
real_courses.append(n)
else:
#print(n)
locked +=1
assert final_page(response)
print("\n")
print(f"Found {len(courses)} courses, {len(real_courses)} available and {locked} locked")
print("\n")
files_to_download = list()
for c in real_courses:
# print(c)
id = c['id']
url = base_url + f'courses/{id}/'
files_response = requests.get(url+'folders', auth=BearerAuth(),params={'per_page':'100'})
assert final_page(files_response)
files_response = json.loads(files_response.content)
for f in files_response:
if f['parent_folder_id'] is None:
assert f['full_name'] == "course files"
files_to_download.append({'folder_id': f['id'],
'course_id': c['id'],
'name': f['full_name'],
'course_code':c['course_code']})
print(f"Collected {len(files_to_download)} root folders to download!")
for f in files_to_download:
print(f['course_code'])
print(f['name'])
url = base_url + f"courses/{f['course_id']}/content_exports"
params={'export_type':'zip',
#'skip_notifications':'true',
'select[folders][]':f['folder_id']}
dl_resp = requests.post(url, auth=BearerAuth(), params=params)
assert(dl_resp.status_code == 200)
dl_resp = json.loads(dl_resp.content)
dl_id = dl_resp['id']
dl_progress_url = dl_resp['progress_url']
while True:
prog_resp = requests.get(dl_progress_url, auth=BearerAuth())
prog = json.loads(prog_resp.content)
print(prog['completion'])
if prog['completion'] == 100:
break
time.sleep(5)
time.sleep(10)
while True:
try:
url = url + f"/{dl_id}"
dl_done = requests.get(url, auth=BearerAuth())
dl_done = json.loads(dl_done.content)
print(dl_done)
a = dl_done["attachment"]
except Exception as e:
print(e)
print(a)
finally:
break
dl_file_url = a['url']
filename = slugify(f['course_code']) + '.zip'
file = requests.get(dl_file_url, auth=BearerAuth())
open(filename, 'wb').write(file.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment