Created
December 20, 2021 02:10
-
-
Save eabase/abe66052acf23cbcfd8576a04bc3d6e3 to your computer and use it in GitHub Desktop.
Get additional PegRations using yfinance (package) and yahoo scraping api
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# To resolve issue #903 in yfinance | |
# https://github.com/ranaroussi/yfinance/issues/903 | |
#---------------------------------------------------------- | |
# Date: 2021-12-19 | |
# | |
# The PEG Ratio Yahoo Finance Paths: | |
# | |
# The working curl: | |
# curl -s https://finance.yahoo.com/quote/{FE}/key-statistics?p={FE} | grep "root.App.main = " | sed -e "s/root.App.main = //" |sed 's/.$//'> dafuck_ok.json | |
# curl -s https://finance.yahoo.com/quote/{FE}/key-statistics?p={FE} | grep "root.App.main = " | sed -e "s/root.App.main = //" |sed 's/.$//' |grep -ioE "[a-zA-Z_\"]*pegRatio.{120}" | |
# | |
# For: quarterlyPegRatio,trailingPegRatio | |
# curl -s 'https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/FE?symbol=FE&type={quarterlyPegRatio,trailingPegRatio}&period1=493590046&period2=1913180947' | |
# | |
# Optional curl headers (user_agent & headers): | |
# -A "curl/7.55.1" | |
# -H "x-api-key: xxxxxx" | |
# -H "accept: application/json" | |
# -H "content-type: application/json" | |
# | |
# accept: application/json;charset=utf-8 #text/html | |
# referer: https://finance.yahoo.com/ | |
# cache-control: no-cache | |
# connection: close | |
# | |
# Default requests (sessions) settings: | |
# "User-Agent": "python-requests/2.26.0", | |
# "Accept-Encoding": "gzip, deflate", | |
# "Accept": "*/*", | |
# "Connection": "keep-alive" | |
# | |
# References: | |
# [1] https://github.com/Mtaylert/Stock_Evaluation/blob/dd5873ceb0c8b614b491a246aa3949960816ad92/src_code/StockEvaluations.ipynb | |
# [2] https://stackoverflow.com/questions/44030983/yahoo-finance-url-not-working | |
# [3] https://www.nylas.com/blog/use-python-requests-module-rest-apis/ | |
# [4] https://docs.python-requests.org/en/master/user/advanced/#session-objects | |
#---------------------------------------------------------- | |
import requests, re, json #, pprint | |
import numpy as np | |
debug = 1 | |
showline = '\n'+'-'*60+'\n' | |
# The RegEx to grab only the JSON part of the request | |
p = re.compile(r'root\.App\.main = (.*);') | |
#tickers = list(np.unique(TopShifts_up['ticker'])) | |
tickers = ['FE'] | |
q_results = {} | |
# QuoteTimeSeriesStore: ['trailingPsRatio','quarterlyPbRatio', 'quarterlyForwardPeRatio', 'quarterlyMarketCap', 'quarterlyPeRatio', 'trailingMarketCap', 'trailingEnterprisesValueEBITDARatio', 'quarterlyEnterprisesValueEBITDARatio', 'trailingForwardPeRatio', 'trailingEnterpriseValue', 'trailingPeRatio', 'quarterlyEnterprisesValueRevenueRatio', 'quarterlyPsRatio', 'trailingEnterprisesValueRevenueRatio', 'quarterlyPegRatio', 'trailingPbRatio', 'quarterlyEnterpriseValue', 'trailingPegRatio', 'timestamp'] | |
my_qs_keys = ['pegRatio'] # QuoteSummaryStore | |
my_ts_keys = ['trailingPegRatio', 'quarterlyPegRatio'] # QuoteTimeSeriesStore | |
my_headers = {'user-agent': 'curl/7.55.1','accept': 'application/json', 'content-type': 'application/json', 'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'} | |
with requests.Session() as s: | |
for ticker in tickers: | |
try: | |
#r = session.get(url=url, proxies=proxy, headers=user_agent_headers) | |
#r = s.get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(ticker,ticker), proxies=none, headers=my_headers) | |
r = s.get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(ticker,ticker), headers=my_headers) | |
if (debug): | |
print(showline,' Request Headers:', showline) | |
reqh = dict(r.request.headers) #.replace('"', '') | |
#json_str = json_str.replace('"', '') | |
print(json.dumps(reqh, indent=2)) | |
#------------------------------------------------------------ | |
print(showline,' Response Headers:', r.status_code, showline) | |
resh = dict(r.headers) | |
print(json.dumps(resh, indent=2)) | |
#pprint.pprint(r.headers, indent=4, depth=1, width=120) | |
print(showline) | |
#if (debug == 2): print(r.text) | |
regex_data = p.findall(r.text) | |
if not regex_data: | |
print(showline,' No JSON block found in Response!', showline) | |
break | |
data = json.loads(p.findall(r.text)[0]) | |
#key_stats = data['context']['dispatcher']['stores']['QuoteSummaryStore'] | |
key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore'] | |
q_results.setdefault(ticker,[]) | |
for i in my_ts_keys: | |
#j=0 | |
try: | |
#res = {i: key_stats['timeSeries'][i][1]['reportedValue']['raw']} | |
# We need to loop over multiple items, if they exist: 0,1,2,.. | |
zzz = key_stats['timeSeries'][i] | |
for j in range(len(zzz)): | |
if key_stats['timeSeries'][i][j]: | |
res = {i: key_stats['timeSeries'][i][j]['reportedValue']['raw']} | |
q_results[ticker].append(res) | |
#print(res) | |
#q_results[ticker].append(res) | |
except: | |
q_results[ticker].append({i:np.nan}) | |
res = {'Company' : ticker} | |
q_results[ticker].append(res) | |
except: | |
pass | |
if (q_results): | |
print(q_results) | |
#---------------------------------------------------------- | |
# EOF | |
#---------------------------------------------------------- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Awesome! Will integrate into yfinance and provide full credit!