Created
October 3, 2021 09:07
-
-
Save mobeigi/3dd0f78bcc3c9f1686a8815ecdc3d3e2 to your computer and use it in GitHub Desktop.
Reddit Symbol Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import praw | |
import regex as re | |
reddit = praw.Reddit( | |
client_id="client_id", | |
client_secret="client_secret", | |
user_agent="MobeigiTest", | |
) | |
submission = reddit.submission(id="pfpyxc") | |
submission.comments.replace_more(limit=0) | |
symbols = dict() | |
for top_level_comment in submission.comments.list(): | |
body = top_level_comment.body | |
# Find tickers | |
matches = re.findall(r'(^|\s)([A-Z]{1,5})(\s|$)', body, overlapped=True) | |
for match in matches: | |
symbol = match[1] | |
symbols[symbol] = symbols.get(symbol, 0) + 1 | |
# filter out values | |
filter_list = ['I', 'ETF', 'A', 'IRA', 'US'] | |
symbols = {k:v for k,v in symbols.items() if k not in filter_list} | |
# Sort by value | |
sorted_symbols = {k: v for k, v in sorted(symbols.items(), key=lambda item: item[1], reverse=True)} | |
print(sorted_symbols) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment