Created
December 7, 2016 09:14
-
-
Save sxlijin/b2a14305e06c901492f1c6f23c9063e5 to your computer and use it in GitHub Desktop.
Parse scraped VOICE data into CSVs for analysis.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from glob import glob as ls | |
import statistics | |
import itertools as it | |
import re | |
def expand(field): | |
field = ((index + 1, freq) | |
for (index, (_, freq)) | |
in enumerate(field[:5])) | |
return (it.chain(*([score] * freq for score, freq in field))) | |
def median(field): | |
return statistics.median(expand(field)) | |
def mean(field): | |
return statistics.mean(expand(field)) | |
def pvar(field): | |
return statistics.pvariance(expand(field)) | |
def main(): | |
for f in ls('downloads/*'): | |
# fields is a list where each entry is the responses to a survey | |
# question, formatted as | |
# | |
# [ [answer1, #], [answer2, #], ..., [answer5, #], [no response , #] ] | |
fields = ((item.split(',') for item in line.strip().split(':')) | |
for line | |
in open(f, 'rU').readlines()) | |
# 0: effectiveness of communication | |
# 1: helpfulness of instructor outside class | |
# 2: effectiveness of instructor at stimulating interest | |
# 3: overall rating of instructor | |
# 4: grading standards of course | |
# 5: requirements for course | |
# 6: how much was learned in the course | |
# 7: how effective course was at intellectual challenge | |
# 8: overall rating of course | |
# 9: reason students took the course | |
# 10: interest in subject prior to course | |
# 11: hours spent on course per week | |
fields = [ [(choice, int(freq)) for (choice, freq) in line] | |
for line in fields ] | |
# WARNING: fields[x][5] always corresponds to the # that did not respond | |
proportion_why = sum(freq for _, freq in fields[9][0:3] | |
)/sum(count for _, count in fields[9][:5]) | |
wt_avg_rating = mean(fields[8]) | |
wt_avg_challs = mean(fields[7]) | |
wt_avg_learned = mean(fields[6]) | |
pvar_rating = pvar(fields[8]) | |
class_level = int(re.search('[0-9]', f).group()) | |
print('%f,%f' % (class_level, wt_avg_challs)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment