Skip to content

Instantly share code, notes, and snippets.

@sxlijin
Created December 7, 2016 09:10
Show Gist options
  • Save sxlijin/cec47bf76e9ad0011cb9fb43fdd155e4 to your computer and use it in GitHub Desktop.
Save sxlijin/cec47bf76e9ad0011cb9fb43fdd155e4 to your computer and use it in GitHub Desktop.
Scrapes the VOICE survey data submitted by students.
#!/usr/bin/bash
VUNETID="put your vunetid here"
VUNETPW="put your vunetid password here"
# login
curl https://www.sds.vanderbilt.edu/perl/voiceview.pl \
-X POST \
-d VSASM_ASVBlock=425457605464733D246C743F77706A6466776A66782F716D3D2477743F425457604E6675693D246C743F4D4D4542513D2477743F42545760516275693D246C743F7878782F7465742F77626F656673636A6D752F666576307166736D3077706A6466776A66782F716D3D2477743F425457605476634E6675693D246C743F45464742564D553D2477743F42545760544A513D246C743F31423533453132473D2477743F425457604271713D246C743F57504A44463D2477743F425457605477733D246C743F54455432 \
-d VSASM_user="${VUNETID}" \
-d VSASM_pw="${VUNETPW}" \
-d VSASM_Login=Login \
> /dev/null
# get information for specific areas
function post_to_voice {
curl https://www.sds.vanderbilt.edu/perl/voiceview.pl \
-X POST \
-d VSASM_BLOCK=567466734A65666F753D246C743F6D6A6B6A6F743D2477743F4C667A324C3D246C743F3537353D2477743F544A513D246C743F3142353345313247 \
$@
}
post_to_voice \
-d VoiceViewUserType=ActiveStudent \
-d TermsAccepted=OK \
> /dev/null
echo -n "" > dept-list.txt
post_to_voice -d ViewSchool=VUAS | grep -o "VUAS:[A-Z]\+" >> dept-list.txt
post_to_voice -d ViewSchool=VUBLR | grep -o "VUBLR:[A-Z]\+" >> dept-list.txt
post_to_voice -d ViewSchool=VUENG | grep -o "VUENG:[A-Z]\+" >> dept-list.txt
echo -n "" > course-list.txt
cat dept-list.txt | \
while IFS=":" read SCHOOL DEPT
do
AREA=${SCHOOL}:${DEPT}
post_to_voice -d ViewSchool=${SCHOOL} -d ViewArea=${AREA} \
| sed -n -e '/ViewCourse/,$p' \
| tail -n +3 \
| sed '/SELECT/q' \
| sed \$d \
| grep -o ">[A-Z0-9]\+" \
| sed "s/>/${AREA}:/" \
>> course-list.txt
done
echo -n "" > score-links-list.txt
cat course-list.txt | \
while IFS=":" read SCHOOL DEPT COURSE
do
AREA=${SCHOOL}:${DEPT}
post_to_voice -d ViewSchool=${SCHOOL} -d ViewArea=${AREA} -d ViewCourse=${COURSE} \
| grep Scores \
| grep -o "A HREF=\"[^\"]\+\"" \
| sed "s/A HREF=//;s/\"//g" \
| sed "s/&/\&/g" \
>> score-links-list.txt
done
# to adjust this script to scrape data for more than
# courses in the fall 2015 semester, the "grep fall2015"
# call should be replaced with "cat", and the "cut"
# invocation on line 73 should be adjusted
grep fall2015 score-links-list.txt | \
while read URL
do
COURSE_FILE=$(echo "${URL}" \
| grep -o 'ScoreTgt=.*&' \
| cut -d, -f2-6 \
| sed "s/,/-/g" \
| sed "s/$/.txt/")
curl "${URL}" \
| grep "valign=center" \
| sed "s/valign=center/\n/g" \
| sed "s/.*nowrap>//;s/ .*//" \
| grep -v "T[RD]" \
| tail -n +2 \
| sed \$d \
| sed "s/.*>//g" \
| paste - - -d"," \
| paste - - - - - - -d":" \
> "downloads/${COURSE_FILE}"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment