Last active
January 13, 2023 23:26
-
-
Save n8henrie/cd61e02d876ef2f23e1daaab5266436a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
search_bad_logins.py :: Compares a LastPass export to your Bitwarden vault. | |
Python >=3.10, no third party Python libraries / dependencies. | |
Outputs BW logins that may have been compromised in the recent LastPass hack | |
based on matching domain and password. | |
It would probably make sense to cast an even wider net by using something like | |
[`xsv`](https://github.com/BurntSushi/xsv) to just search for a potentially | |
compormised password *anywhere* in your vault, but this should help point out | |
some "this definitely needs to be changed" logins. | |
Prior to running, you'll need to: | |
1. Install the Bitwarden CLI | |
2. Log in with `bw login` | |
3. Export the session variable: | |
- Copy and paste the line starting with `$ export BW_SESSION=` | |
- You'll need to remove the leading `$` | |
- I highly recommend you insert a leading space where the `$` was, | |
which should keep this command out of your bash history | |
4. Run this script from the same terminal you used for the steps 1-4; run | |
`bw list items` to make sure your login is working first | |
Usage: | |
./search_bad_logins.py --yes-i-read-the-script /path/to/lastpass-export.csv | |
""" | |
import argparse | |
import csv | |
import io | |
import json | |
import os | |
import subprocess | |
import sys | |
import urllib.request | |
from collections import defaultdict | |
from functools import lru_cache | |
from urllib.parse import urlparse | |
def _cli(show_usage: bool = False) -> argparse.Namespace: | |
parser = argparse.ArgumentParser() | |
parser.add_argument("lastpass_csv", type=argparse.FileType("r", 1)) | |
parser.add_argument( | |
"--yes-i-read-the-script", | |
action="store_true", | |
help=""" | |
This tool is reading both your lastpass and bitwarden vault data. | |
You *really* should read the source code before you run it. Will | |
not run without this flag set. | |
""", | |
) | |
parser.add_argument( | |
"--strip-subdomains", | |
action="store_true", | |
help=""" | |
Try to match based on TLD alone. This option will require | |
downloading the list of TLDs from its GitHub mirror. Doesn't cache | |
the response (yet). | |
""", | |
) | |
parser.add_argument( | |
"--passwords-anywhere", | |
action="store_true", | |
help=""" | |
Report any Bitwarden item that has a matching password anywhere in | |
Lastpass. Skips matching on URI or username, so doesn't output | |
usernames, and the shown URI is for the relevant Bitwarden item. | |
""", | |
) | |
group = parser.add_mutually_exclusive_group() | |
group.add_argument( | |
"--output-format", | |
default="csv", | |
choices=["csv", "json"], | |
help="Output format", | |
) | |
group.add_argument( | |
"--json", | |
action="store_true", | |
help="Shortcut for `--output-form=json`", | |
) | |
group.add_argument( | |
"--csv", | |
action="store_true", | |
help="Shortcut for `--output-form=csv`", | |
) | |
if show_usage: | |
return parser.parse_args(["--help"]) | |
args = parser.parse_args() | |
return args | |
@lru_cache | |
def get_tlds() -> set[str]: | |
"""Return a set of TLDs from https://www.publicsuffix.org. | |
Uses the GitHub mirror to save publicsuffix.org some bandwidth. Doesn't | |
cache the response yet, will download once for each run so please be kind. | |
""" | |
# url = "https://www.publicsuffix.org/list/public_suffix_list.dat" | |
url = ( | |
"https://raw.githubusercontent.com/publicsuffix/list/master/" | |
"public_suffix_list.dat" | |
) | |
with urllib.request.urlopen(url) as resp: | |
if resp.status != 200: | |
raise Exception("Unable to get the TLD list") | |
raw_tlds = resp.read().decode("utf8") | |
return { | |
stripped_line | |
for line in raw_tlds.splitlines() | |
if all(((stripped_line := line.strip()), not line.startswith("//"))) | |
} | |
def strip_subdomains(uri: str) -> str: | |
"""Strip subdomains from the URI to broaden the match. | |
If LastPass has `shop.foo.com` and bitwarden has `foo.com`, that should | |
probably match. | |
Best effort basis. | |
>>> strip_subdomains("foo.bar.www.n8henrie.com") | |
'n8henrie.com' | |
>>> strip_subdomains("some.where.uk") | |
'where.uk' | |
>>> strip_subdomains("some.where.co.uk") | |
'where.co.uk' | |
""" | |
parts = uri.strip().split(".") | |
domain = "" | |
tlds = get_tlds() | |
while True: | |
try: | |
chunk = parts.pop() | |
except IndexError: | |
return uri.strip() | |
domain = f"{chunk}.{domain}".rstrip(".") | |
if domain not in tlds: | |
return domain | |
# bw list items | | |
# jq '.[10].login.uris[0].uri, .[10].login.username, .[10].login.password' | |
def get_bw_items(strip_subs: bool = False) -> dict[str, dict[str, str]]: | |
"""Return mapping of bitwarden logins. | |
Format: { domain: [(username, password)] } | |
Initially used {url: {login: password}} created in a dict comprehension, | |
but this should be fine and ensures that duplicate entries don't get | |
overwritten. | |
""" | |
cmd = subprocess.run( | |
"bw list items --nointeraction".split(), capture_output=True | |
) | |
if cmd.returncode > 0: | |
print(cmd.stderr.decode()) | |
cmd.check_returncode() | |
bw_items_raw = cmd.stdout | |
bw_items = defaultdict(list) | |
for item in json.loads(bw_items_raw): | |
if not (login := item.get("login")): | |
continue | |
if not (uris := login.get("uris")): | |
continue | |
for uri_raw_dict in uris: | |
uri_raw = uri_raw_dict["uri"] | |
if uri_raw is None: | |
uri_raw = "" | |
uri = urlparse(uri_raw).netloc | |
if strip_subs: | |
uri = strip_subdomains(uri) | |
user, pw = login.get("username"), login.get("password") | |
# Replace `None`s with "" | |
bw_items[uri].append((user or "", pw or "")) | |
return dict(bw_items) | |
def get_lp_items( | |
filehandle, strip_subs: bool = False | |
) -> list[tuple[str, str, str]]: | |
"""Return list of lastpass logins. | |
Format: [(uri, username, password)] | |
""" | |
lp_items = [] | |
for row in csv.DictReader(filehandle): | |
uri = urlparse(row["url"]).netloc | |
if strip_subs: | |
uri = strip_subdomains(uri) | |
lp_items.append((uri, row["username"], row["password"])) | |
filehandle.close() | |
return lp_items | |
def _compare( | |
lp_items, | |
bw_items, | |
passwords_anywhere: bool = False, | |
) -> dict[str, dict[str, dict[str, str]]]: | |
matches = dict() | |
if passwords_anywhere: | |
lp_pws: set[str] = {pw for pw in lp_items} | |
for _, _, lp_pw in lp_pws: | |
for uri, login in bw_items.items(): | |
for bw_user, bw_pw in login: | |
if bw_pw == lp_pw and lp_pw.strip(): | |
matches[uri] = { | |
"password": lp_pw, | |
"lp_user": None, | |
"bw_user": None, | |
} | |
return matches | |
for (uri, lp_user, lp_pw) in lp_items: | |
if not (bw_logins := bw_items.get(uri)): | |
continue | |
for (bw_user, bw_pw) in bw_logins: | |
if lp_pw == bw_pw: | |
matches[uri] = { | |
"password": lp_pw, | |
"lp_user": lp_user, | |
"bw_user": bw_user, | |
} | |
return matches | |
def _output( | |
matches, | |
fmt: str, | |
) -> str: | |
match fmt: | |
case "json": | |
return json.dumps(matches, sort_keys=True, indent=4) | |
case "csv": | |
output = io.StringIO() | |
writer = csv.writer(output, lineterminator=os.linesep) | |
headers = ["uri", "lp_user", "bw_user", "password"] | |
writer.writerow(headers) | |
for uri, match in matches.items(): | |
pw = match["password"] | |
lp_user = match["lp_user"] | |
bw_user = match["bw_user"] | |
writer.writerow([uri, lp_user, bw_user, pw]) | |
return output.getvalue().strip() | |
case other: | |
raise ValueError(f"Unknown output format: {other}") | |
def main(): | |
"""Get the bitwarden logins, read the lastpass csv, and compare them.""" | |
args = _cli() | |
if not args.yes_i_read_the_script: | |
_cli(show_usage=True) | |
sys.exit(1) | |
lp_items = get_lp_items( | |
args.lastpass_csv, strip_subs=args.strip_subdomains | |
) | |
bw_items = get_bw_items(strip_subs=args.strip_subdomains) | |
matches = _compare( | |
lp_items=lp_items, | |
bw_items=bw_items, | |
passwords_anywhere=args.passwords_anywhere, | |
) | |
match (args.csv, args.json): | |
case (True, _): | |
fmt = "csv" | |
case (_, True): | |
fmt = "json" | |
case _: | |
fmt = args.output_format | |
print(_output(matches, fmt=fmt)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment