Last active
March 22, 2024 20:47
-
-
Save ivy/e865eca249763e2cd212758c305bccf9 to your computer and use it in GitHub Desktop.
Convert Chase credit card statements from PDF to CSV. Useful for importing old transactions into QuickBooks Online/Self-Employed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# chasepdf2csv -- Convert Chase credit card statements from PDF to CSV. Written | |
# to easily import older statements into QuickBooks Online/Self-Employed. Chase | |
# unfortunately only offers statements up to 6 months in the past, making it a | |
# huge chore to synchronize past transactions. | |
# | |
# How to Use | |
# ---------- | |
# This script requires Ruby >2.0.0 and pdftotext. Copy this script somewhere and | |
# make it executable. Run it like any other command. | |
# | |
# ISC License | |
# ----------- | |
# Copyright (c) 2018-2020 Ivy Evans <[email protected]> | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
# PERFORMANCE OF THIS SOFTWARE. | |
require 'csv' | |
require 'optparse' | |
def error(msg) | |
STDERR.puts("error: #{msg}") | |
end | |
def fatal(msg) | |
error(msg) | |
exit 1 | |
end | |
class Statement | |
DUE_DATE_PATTERN = %r{ | |
Payment\s+Due\s+Date:? | |
\s+ | |
(?<month>\d{2})/(?<day>\d{2})/(?<year>\d{2}) | |
}x | |
class Transaction | |
# Regex for matching transactions in a Chase credit statement. | |
# | |
# Edge Case: Amazon orders | |
# | |
# 01/23 AMAZON MKTPLACE PMTS AMZN.COM/BILL WA 12.34\n | |
# Order Number 123-4567890-1234567\n | |
# | |
# Edge Case: Rewards points | |
# | |
# 01/23 AMAZON MARKETPLACE AMZN.COM/BILLWA 4.56 7,890 | |
# | |
LINE_ITEM_PATTERN = %r{ | |
(?<date>\d{2}/\d{2}) | |
\s+ | |
(?<description>.+) | |
\s+ | |
(?<amount>-?[\d,]+\.\d{2}) | |
( | |
[ ] | |
(?<points>[1-9][\d,]+)? | |
| | |
\s* | |
Order\s+Number\s+ | |
(?<order_num>[^\s]+) | |
)? | |
}x | |
def self.scan(output, year) | |
output.to_enum(:scan, LINE_ITEM_PATTERN).collect { | |
Transaction.new(Regexp.last_match, year) | |
} | |
end | |
def initialize(data, year) | |
@date = data[:date]+"/#{year}" | |
@description = data[:description] | |
@amount = data[:amount] | |
@points = data[:points] | |
@order_num = data[:order_num] | |
end | |
attr_reader :date, :amount, :points, :order_num | |
alias rewards? points | |
alias order_num? order_num | |
def description | |
order_num? ? "#{@description} ##{order_num}" : @description | |
end | |
def to_hash | |
{ | |
date: date, | |
description: description, | |
amount: amount, | |
points: points, | |
order_num: order_num, | |
} | |
end | |
alias to_h to_hash | |
end | |
attr_reader :line_items | |
def self.parse(path) | |
output = `pdftotext -raw #{path} -` | |
unless $?.success? | |
fatal "pdftotext: failed to parse #{path} (exit code #{$?})" | |
end | |
unless m = output.match(DUE_DATE_PATTERN) | |
fatal "parse error: could not match due date in #{path}" | |
end | |
new(Transaction.scan(output, m[:year])) | |
end | |
def initialize(line_items) | |
@line_items = line_items | |
end | |
def each_line_item(&block) | |
line_items.each(&block) | |
end | |
end | |
def main(args = ARGV) | |
unless system('command -v pdftotext >/dev/null 2>&1') | |
fatal "error: pdftotext not found!" | |
end | |
outfile = STDOUT | |
options = OptionParser.new do |opts| | |
opts.banner = "Usage: #{$0} [options] FILE..." | |
opts.on('-o', '--output=FILE', 'Output to file') do |path| | |
outfile = File.open(path, 'w') | |
end | |
opts.on('-h', '--help', 'Show this message') do | |
puts opts | |
exit | |
end | |
end | |
options.parse!(args) | |
if ARGV.empty? | |
fatal "error: no files specified" | |
exit 1 | |
end | |
csv = CSV.new( | |
outfile, headers: %w[Date Description Amount], write_headers: true, | |
) | |
ARGV.each do |file| | |
Statement.parse(file).each_line_item do |line_item| | |
next if line_item.rewards? | |
csv << [ | |
line_item.date, line_item.description, line_item.amount | |
] | |
end | |
end | |
end | |
if $0 == __FILE__ | |
main | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Unfortunately, this method only allows up to 24 months or ~1500 transactions max. If you need 7 years of transactions, which Chase keeps, they tell you to download the PDF statements.