Skip to content

Instantly share code, notes, and snippets.

@n8henrie
Last active January 13, 2023 23:26
Show Gist options
  • Save n8henrie/cd61e02d876ef2f23e1daaab5266436a to your computer and use it in GitHub Desktop.
Save n8henrie/cd61e02d876ef2f23e1daaab5266436a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
search_bad_logins.py :: Compares a LastPass export to your Bitwarden vault.
Python >=3.10, no third party Python libraries / dependencies.
Outputs BW logins that may have been compromised in the recent LastPass hack
based on matching domain and password.
It would probably make sense to cast an even wider net by using something like
[`xsv`](https://github.com/BurntSushi/xsv) to just search for a potentially
compormised password *anywhere* in your vault, but this should help point out
some "this definitely needs to be changed" logins.
Prior to running, you'll need to:
1. Install the Bitwarden CLI
2. Log in with `bw login`
3. Export the session variable:
- Copy and paste the line starting with `$ export BW_SESSION=`
- You'll need to remove the leading `$`
- I highly recommend you insert a leading space where the `$` was,
which should keep this command out of your bash history
4. Run this script from the same terminal you used for the steps 1-4; run
`bw list items` to make sure your login is working first
Usage:
./search_bad_logins.py --yes-i-read-the-script /path/to/lastpass-export.csv
"""
import argparse
import csv
import io
import json
import os
import subprocess
import sys
import urllib.request
from collections import defaultdict
from functools import lru_cache
from urllib.parse import urlparse
def _cli(show_usage: bool = False) -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("lastpass_csv", type=argparse.FileType("r", 1))
parser.add_argument(
"--yes-i-read-the-script",
action="store_true",
help="""
This tool is reading both your lastpass and bitwarden vault data.
You *really* should read the source code before you run it. Will
not run without this flag set.
""",
)
parser.add_argument(
"--strip-subdomains",
action="store_true",
help="""
Try to match based on TLD alone. This option will require
downloading the list of TLDs from its GitHub mirror. Doesn't cache
the response (yet).
""",
)
parser.add_argument(
"--passwords-anywhere",
action="store_true",
help="""
Report any Bitwarden item that has a matching password anywhere in
Lastpass. Skips matching on URI or username, so doesn't output
usernames, and the shown URI is for the relevant Bitwarden item.
""",
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"--output-format",
default="csv",
choices=["csv", "json"],
help="Output format",
)
group.add_argument(
"--json",
action="store_true",
help="Shortcut for `--output-form=json`",
)
group.add_argument(
"--csv",
action="store_true",
help="Shortcut for `--output-form=csv`",
)
if show_usage:
return parser.parse_args(["--help"])
args = parser.parse_args()
return args
@lru_cache
def get_tlds() -> set[str]:
"""Return a set of TLDs from https://www.publicsuffix.org.
Uses the GitHub mirror to save publicsuffix.org some bandwidth. Doesn't
cache the response yet, will download once for each run so please be kind.
"""
# url = "https://www.publicsuffix.org/list/public_suffix_list.dat"
url = (
"https://raw.githubusercontent.com/publicsuffix/list/master/"
"public_suffix_list.dat"
)
with urllib.request.urlopen(url) as resp:
if resp.status != 200:
raise Exception("Unable to get the TLD list")
raw_tlds = resp.read().decode("utf8")
return {
stripped_line
for line in raw_tlds.splitlines()
if all(((stripped_line := line.strip()), not line.startswith("//")))
}
def strip_subdomains(uri: str) -> str:
"""Strip subdomains from the URI to broaden the match.
If LastPass has `shop.foo.com` and bitwarden has `foo.com`, that should
probably match.
Best effort basis.
>>> strip_subdomains("foo.bar.www.n8henrie.com")
'n8henrie.com'
>>> strip_subdomains("some.where.uk")
'where.uk'
>>> strip_subdomains("some.where.co.uk")
'where.co.uk'
"""
parts = uri.strip().split(".")
domain = ""
tlds = get_tlds()
while True:
try:
chunk = parts.pop()
except IndexError:
return uri.strip()
domain = f"{chunk}.{domain}".rstrip(".")
if domain not in tlds:
return domain
# bw list items |
# jq '.[10].login.uris[0].uri, .[10].login.username, .[10].login.password'
def get_bw_items(strip_subs: bool = False) -> dict[str, dict[str, str]]:
"""Return mapping of bitwarden logins.
Format: { domain: [(username, password)] }
Initially used {url: {login: password}} created in a dict comprehension,
but this should be fine and ensures that duplicate entries don't get
overwritten.
"""
cmd = subprocess.run(
"bw list items --nointeraction".split(), capture_output=True
)
if cmd.returncode > 0:
print(cmd.stderr.decode())
cmd.check_returncode()
bw_items_raw = cmd.stdout
bw_items = defaultdict(list)
for item in json.loads(bw_items_raw):
if not (login := item.get("login")):
continue
if not (uris := login.get("uris")):
continue
for uri_raw_dict in uris:
uri_raw = uri_raw_dict["uri"]
if uri_raw is None:
uri_raw = ""
uri = urlparse(uri_raw).netloc
if strip_subs:
uri = strip_subdomains(uri)
user, pw = login.get("username"), login.get("password")
# Replace `None`s with ""
bw_items[uri].append((user or "", pw or ""))
return dict(bw_items)
def get_lp_items(
filehandle, strip_subs: bool = False
) -> list[tuple[str, str, str]]:
"""Return list of lastpass logins.
Format: [(uri, username, password)]
"""
lp_items = []
for row in csv.DictReader(filehandle):
uri = urlparse(row["url"]).netloc
if strip_subs:
uri = strip_subdomains(uri)
lp_items.append((uri, row["username"], row["password"]))
filehandle.close()
return lp_items
def _compare(
lp_items,
bw_items,
passwords_anywhere: bool = False,
) -> dict[str, dict[str, dict[str, str]]]:
matches = dict()
if passwords_anywhere:
lp_pws: set[str] = {pw for pw in lp_items}
for _, _, lp_pw in lp_pws:
for uri, login in bw_items.items():
for bw_user, bw_pw in login:
if bw_pw == lp_pw and lp_pw.strip():
matches[uri] = {
"password": lp_pw,
"lp_user": None,
"bw_user": None,
}
return matches
for (uri, lp_user, lp_pw) in lp_items:
if not (bw_logins := bw_items.get(uri)):
continue
for (bw_user, bw_pw) in bw_logins:
if lp_pw == bw_pw:
matches[uri] = {
"password": lp_pw,
"lp_user": lp_user,
"bw_user": bw_user,
}
return matches
def _output(
matches,
fmt: str,
) -> str:
match fmt:
case "json":
return json.dumps(matches, sort_keys=True, indent=4)
case "csv":
output = io.StringIO()
writer = csv.writer(output, lineterminator=os.linesep)
headers = ["uri", "lp_user", "bw_user", "password"]
writer.writerow(headers)
for uri, match in matches.items():
pw = match["password"]
lp_user = match["lp_user"]
bw_user = match["bw_user"]
writer.writerow([uri, lp_user, bw_user, pw])
return output.getvalue().strip()
case other:
raise ValueError(f"Unknown output format: {other}")
def main():
"""Get the bitwarden logins, read the lastpass csv, and compare them."""
args = _cli()
if not args.yes_i_read_the_script:
_cli(show_usage=True)
sys.exit(1)
lp_items = get_lp_items(
args.lastpass_csv, strip_subs=args.strip_subdomains
)
bw_items = get_bw_items(strip_subs=args.strip_subdomains)
matches = _compare(
lp_items=lp_items,
bw_items=bw_items,
passwords_anywhere=args.passwords_anywhere,
)
match (args.csv, args.json):
case (True, _):
fmt = "csv"
case (_, True):
fmt = "json"
case _:
fmt = args.output_format
print(_output(matches, fmt=fmt))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment