Created
August 1, 2024 08:13
-
-
Save Timvrakas/f624a2a69f23664d019ffd94974591b3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from requests_html import HTMLSession | |
import requests | |
from bs4 import BeautifulSoup as bs # importing BeautifulSoup | |
import json | |
from IPython import embed | |
import time | |
import unicodedata | |
import re | |
def slugify(value): | |
value = str(value) | |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii') | |
value = re.sub(r'[^\w\s-]', '', value.lower()) | |
return re.sub(r'[-\s]+', '-', value).strip('-_') | |
base_url = "https://canvas.stanford.edu/api/v1/" | |
class BearerAuth(requests.auth.AuthBase): | |
def __init__(self): | |
self.token = '<TOKEN_HERE>' | |
def __call__(self, r): | |
r.headers["authorization"] = "Bearer " + self.token | |
return r | |
def final_page(response): | |
links = response.headers["link"].split(',') | |
links = [l.split(';') for l in links] | |
links = {l[1].strip():l[0] for l in links} | |
return links['rel="current"'] == links['rel="last"'] | |
response = requests.get(base_url+'courses', auth=BearerAuth(),params={'per_page':'100'}) | |
courses = json.loads(response.content) | |
real_courses = list() | |
locked = 0 | |
for n in courses: | |
if 'name' in n: | |
#print(n['name']) | |
real_courses.append(n) | |
else: | |
#print(n) | |
locked +=1 | |
assert final_page(response) | |
print("\n") | |
print(f"Found {len(courses)} courses, {len(real_courses)} available and {locked} locked") | |
print("\n") | |
files_to_download = list() | |
for c in real_courses: | |
# print(c) | |
id = c['id'] | |
url = base_url + f'courses/{id}/' | |
files_response = requests.get(url+'folders', auth=BearerAuth(),params={'per_page':'100'}) | |
assert final_page(files_response) | |
files_response = json.loads(files_response.content) | |
for f in files_response: | |
if f['parent_folder_id'] is None: | |
assert f['full_name'] == "course files" | |
files_to_download.append({'folder_id': f['id'], | |
'course_id': c['id'], | |
'name': f['full_name'], | |
'course_code':c['course_code']}) | |
print(f"Collected {len(files_to_download)} root folders to download!") | |
for f in files_to_download: | |
print(f['course_code']) | |
print(f['name']) | |
url = base_url + f"courses/{f['course_id']}/content_exports" | |
params={'export_type':'zip', | |
#'skip_notifications':'true', | |
'select[folders][]':f['folder_id']} | |
dl_resp = requests.post(url, auth=BearerAuth(), params=params) | |
assert(dl_resp.status_code == 200) | |
dl_resp = json.loads(dl_resp.content) | |
dl_id = dl_resp['id'] | |
dl_progress_url = dl_resp['progress_url'] | |
while True: | |
prog_resp = requests.get(dl_progress_url, auth=BearerAuth()) | |
prog = json.loads(prog_resp.content) | |
print(prog['completion']) | |
if prog['completion'] == 100: | |
break | |
time.sleep(5) | |
time.sleep(10) | |
while True: | |
try: | |
url = url + f"/{dl_id}" | |
dl_done = requests.get(url, auth=BearerAuth()) | |
dl_done = json.loads(dl_done.content) | |
print(dl_done) | |
a = dl_done["attachment"] | |
except Exception as e: | |
print(e) | |
print(a) | |
finally: | |
break | |
dl_file_url = a['url'] | |
filename = slugify(f['course_code']) + '.zip' | |
file = requests.get(dl_file_url, auth=BearerAuth()) | |
open(filename, 'wb').write(file.content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment