sxlijin/parse_voice_data.py

## parse_voice_data.py
#!/usr/bin/env python3

from glob import glob as ls
import statistics
import itertools as it
import re

def expand(field):
    field = ((index + 1, freq)
             for (index, (_, freq))
             in enumerate(field[:5]))
    return (it.chain(*([score] * freq for score, freq in field)))

def median(field):
    return statistics.median(expand(field))

def mean(field):
    return statistics.mean(expand(field))

def pvar(field):
    return statistics.pvariance(expand(field))

def main():
    for f in ls('downloads/*'):
        # fields is a list where each entry is the responses to a survey
        # question, formatted as
        #
        # [ [answer1, #], [answer2, #], ..., [answer5, #], [no response , #] ]

        fields = ((item.split(',') for item in line.strip().split(':'))
                    for line
                    in open(f, 'rU').readlines())

        #  0: effectiveness of communication
        #  1: helpfulness of instructor outside class
        #  2: effectiveness of instructor at stimulating interest
        #  3: overall rating of instructor
        #  4: grading standards of course
        #  5: requirements for course
        #  6: how much was learned in the course
        #  7: how effective course was at intellectual challenge
        #  8: overall rating of course
        #  9: reason students took the course
        # 10: interest in subject prior to course
        # 11: hours spent on course per week
        fields = [ [(choice, int(freq)) for (choice, freq) in line]
                    for line in fields ]
        # WARNING: fields[x][5] always corresponds to the # that did not respond

        proportion_why = sum(freq for _, freq in fields[9][0:3]
                            )/sum(count for _, count in fields[9][:5])

        wt_avg_rating = mean(fields[8])
        wt_avg_challs = mean(fields[7])
        wt_avg_learned = mean(fields[6])

        pvar_rating = pvar(fields[8])

        class_level = int(re.search('[0-9]', f).group())

        print('%f,%f' % (class_level, wt_avg_challs))

if __name__ == '__main__':
    main()
	#!/usr/bin/env python3

	from glob import glob as ls
	import statistics
	import itertools as it
	import re

	def expand(field):
	field = ((index + 1, freq)
	for (index, (_, freq))
	in enumerate(field[:5]))
	return (it.chain(([score] freq for score, freq in field)))

	def median(field):
	return statistics.median(expand(field))

	def mean(field):
	return statistics.mean(expand(field))

	def pvar(field):
	return statistics.pvariance(expand(field))

	def main():
	for f in ls('downloads/*'):
	# fields is a list where each entry is the responses to a survey
	# question, formatted as
	#
	# [ [answer1, #], [answer2, #], ..., [answer5, #], [no response , #] ]

	fields = ((item.split(',') for item in line.strip().split(':'))
	for line
	in open(f, 'rU').readlines())

	# 0: effectiveness of communication
	# 1: helpfulness of instructor outside class
	# 2: effectiveness of instructor at stimulating interest
	# 3: overall rating of instructor
	# 4: grading standards of course
	# 5: requirements for course
	# 6: how much was learned in the course
	# 7: how effective course was at intellectual challenge
	# 8: overall rating of course
	# 9: reason students took the course
	# 10: interest in subject prior to course
	# 11: hours spent on course per week
	fields = [ [(choice, int(freq)) for (choice, freq) in line]
	for line in fields ]
	# WARNING: fields[x][5] always corresponds to the # that did not respond

	proportion_why = sum(freq for _, freq in fields[9][0:3]
	)/sum(count for _, count in fields[9][:5])

	wt_avg_rating = mean(fields[8])
	wt_avg_challs = mean(fields[7])
	wt_avg_learned = mean(fields[6])

	pvar_rating = pvar(fields[8])

	class_level = int(re.search('[0-9]', f).group())

	print('%f,%f' % (class_level, wt_avg_challs))

	if __name__ == '__main__':
	main()