Created
November 28, 2017 01:21
-
-
Save jasoncodes/72eb6c4de66a4b2b436f7df03f031a49 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'bundler' | |
Bundler.configure | |
require 'bundler/inline' | |
gemfile do | |
source 'http://rubygems.org' | |
gem 'activesupport', require: 'active_support/all' | |
gem 'aws-sdk-s3' | |
gem 'parallel' | |
gem 'curb' | |
gem 'retryable' | |
end | |
require 'shellwords' | |
$app_name = ENV.fetch('APP_NAME') | |
$papertrail_token = `heroku config:get PAPERTRAIL_API_TOKEN -a #{Shellwords.escape $app_name}`.chomp | |
raise unless $?.success? | |
raise unless $papertrail_token.present? | |
$s3 = Aws::S3::Client.new( | |
access_key_id: ENV.fetch('S3_ACCESS_KEY_ID'), | |
secret_access_key: ENV.fetch('S3_SECRET_ACCESS_KEY'), | |
region: 'us-east-1', | |
) | |
$s3_bucket = ENV.fetch('S3_BUCKET') | |
class RateLimitError < RuntimeError; end | |
def archive(date, hour) | |
date_name = date.strftime("%Y-%m-%d") | |
archive_name = if hour | |
"%s-%02d" % [date_name, hour] | |
else | |
date_name | |
end | |
filename = "#{archive_name}.tsv.gz" | |
s3_key = "#{$app_name}/#{date_name}/#{filename}" | |
puts "#{$app_name} #{archive_name}: Checking" | |
exists = begin | |
$s3.head_object(bucket: $s3_bucket, key: s3_key) | |
true | |
rescue Aws::S3::Errors::NotFound | |
false | |
end | |
if exists | |
puts "#{$app_name} #{archive_name}: Already archived" | |
return true | |
end | |
papertrail_url = "https://papertrailapp.com/api/v1/archives/#{archive_name}/download" | |
puts "#{$app_name} #{archive_name}: Downloading" | |
response = nil | |
Retryable.retryable(tries: 5, sleep: 5, on: RateLimitError) do | |
response = Curl.get(papertrail_url) do |http| | |
http.follow_location = true | |
http.headers['X-Papertrail-Token'] = $papertrail_token | |
end | |
if response.response_code == 429 | |
puts "#{$app_name} #{archive_name}: Rate limited" | |
raise RateLimitError | |
end | |
end | |
if response.response_code == 404 | |
puts "#{$app_name} #{archive_name}: Not found" | |
return false | |
end | |
unless response.response_code == 200 | |
raise response.body | |
end | |
unless response.body[0..1] == "\x1F\x8B".force_encoding('ASCII-8BIT') | |
raise 'expected gzip data' | |
end | |
puts "#{$app_name} #{archive_name}: Uploading" | |
$s3.put_object(bucket: $s3_bucket, key: s3_key, body: response.body) | |
puts "#{$app_name} #{archive_name}: Done" | |
return true | |
end | |
Parallel.each Date.today.downto(Date.today - 385), in_threads: 5 do |date| | |
next if archive(date, nil) | |
hour = 0 | |
while hour < 24 | |
break unless archive(date, hour) | |
hour += 1 | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment