Last active
August 29, 2015 14:02
-
-
Save amirkdv/634d5ac2f02d54652d78 to your computer and use it in GitHub Desktop.
Login, click through, and get the data you need from an HTTPS site
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Use Mechanize to automatically login, click through, and fetch the data you | |
# need from an HTTPS site. The following works for the McGill admission portal. | |
# ruby mechanic.rb | |
# # Ready for Review | |
# # Ready for Review | |
require 'rubygems' | |
require 'mechanize' | |
site = 'https://mcgill.ca/minerva' | |
login_form_action = '/pban1/twbkwbis.P_ValLogin' | |
credentials = [ {:field => 'sid', :value => 'MY_STUDENTID'}, | |
{:field => 'PIN', :value => 'MY_PASSWORD'} ] | |
click_chain = [ 'Applicant', 'Admission Application and Documentation Status' ] | |
logout_link_text = 'EXIT' | |
mech = Mechanize.new { |agent| | |
# set a proper user agent | |
# agent.user_agent = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0' | |
agent.user_agent_alias = 'Linux Firefox' # see Mechanize::AGENT_ALIASES | |
agent.follow_meta_refresh = true | |
} | |
mech.get(site) do |page| | |
# login | |
page = page.form_with(:action => login_form_action) do |form| | |
credentials.each {|c| form[c[:field]] = c[:value] } | |
end.click_button | |
# click through | |
click_chain.each { |t| page = page.link_with(:text => t).click } | |
# do your thing | |
page.links_with(:text => 'Fall 2014').each do |link| | |
puts link.click.body.match(/Status:<\/TD>\n<TD[^\/]*>[^\/]*<\/TD>/)[0].match(/>([^<]*)<\/TD>/).captures[0] | |
end | |
# logout | |
page = page.link_with(:text => logout_link_text).click | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment