Skip to content

Instantly share code, notes, and snippets.

@eabase
Created December 20, 2021 02:10
Show Gist options
  • Save eabase/abe66052acf23cbcfd8576a04bc3d6e3 to your computer and use it in GitHub Desktop.
Save eabase/abe66052acf23cbcfd8576a04bc3d6e3 to your computer and use it in GitHub Desktop.
Get additional PegRations using yfinance (package) and yahoo scraping api
#
# To resolve issue #903 in yfinance
# https://github.com/ranaroussi/yfinance/issues/903
#----------------------------------------------------------
# Date: 2021-12-19
#
# The PEG Ratio Yahoo Finance Paths:
#
# The working curl:
# curl -s https://finance.yahoo.com/quote/{FE}/key-statistics?p={FE} | grep "root.App.main = " | sed -e "s/root.App.main = //" |sed 's/.$//'> dafuck_ok.json
# curl -s https://finance.yahoo.com/quote/{FE}/key-statistics?p={FE} | grep "root.App.main = " | sed -e "s/root.App.main = //" |sed 's/.$//' |grep -ioE "[a-zA-Z_\"]*pegRatio.{120}"
#
# For: quarterlyPegRatio,trailingPegRatio
# curl -s 'https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/FE?symbol=FE&type={quarterlyPegRatio,trailingPegRatio}&period1=493590046&period2=1913180947'
#
# Optional curl headers (user_agent & headers):
# -A "curl/7.55.1"
# -H "x-api-key: xxxxxx"
# -H "accept: application/json"
# -H "content-type: application/json"
#
# accept: application/json;charset=utf-8 #text/html
# referer: https://finance.yahoo.com/
# cache-control: no-cache
# connection: close
#
# Default requests (sessions) settings:
# "User-Agent": "python-requests/2.26.0",
# "Accept-Encoding": "gzip, deflate",
# "Accept": "*/*",
# "Connection": "keep-alive"
#
# References:
# [1] https://github.com/Mtaylert/Stock_Evaluation/blob/dd5873ceb0c8b614b491a246aa3949960816ad92/src_code/StockEvaluations.ipynb
# [2] https://stackoverflow.com/questions/44030983/yahoo-finance-url-not-working
# [3] https://www.nylas.com/blog/use-python-requests-module-rest-apis/
# [4] https://docs.python-requests.org/en/master/user/advanced/#session-objects
#----------------------------------------------------------
import requests, re, json #, pprint
import numpy as np
debug = 1
showline = '\n'+'-'*60+'\n'
# The RegEx to grab only the JSON part of the request
p = re.compile(r'root\.App\.main = (.*);')
#tickers = list(np.unique(TopShifts_up['ticker']))
tickers = ['FE']
q_results = {}
# QuoteTimeSeriesStore: ['trailingPsRatio','quarterlyPbRatio', 'quarterlyForwardPeRatio', 'quarterlyMarketCap', 'quarterlyPeRatio', 'trailingMarketCap', 'trailingEnterprisesValueEBITDARatio', 'quarterlyEnterprisesValueEBITDARatio', 'trailingForwardPeRatio', 'trailingEnterpriseValue', 'trailingPeRatio', 'quarterlyEnterprisesValueRevenueRatio', 'quarterlyPsRatio', 'trailingEnterprisesValueRevenueRatio', 'quarterlyPegRatio', 'trailingPbRatio', 'quarterlyEnterpriseValue', 'trailingPegRatio', 'timestamp']
my_qs_keys = ['pegRatio'] # QuoteSummaryStore
my_ts_keys = ['trailingPegRatio', 'quarterlyPegRatio'] # QuoteTimeSeriesStore
my_headers = {'user-agent': 'curl/7.55.1','accept': 'application/json', 'content-type': 'application/json', 'referer': 'https://finance.yahoo.com/', 'cache-control': 'no-cache', 'connection': 'close'}
with requests.Session() as s:
for ticker in tickers:
try:
#r = session.get(url=url, proxies=proxy, headers=user_agent_headers)
#r = s.get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(ticker,ticker), proxies=none, headers=my_headers)
r = s.get('https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(ticker,ticker), headers=my_headers)
if (debug):
print(showline,' Request Headers:', showline)
reqh = dict(r.request.headers) #.replace('"', '')
#json_str = json_str.replace('"', '')
print(json.dumps(reqh, indent=2))
#------------------------------------------------------------
print(showline,' Response Headers:', r.status_code, showline)
resh = dict(r.headers)
print(json.dumps(resh, indent=2))
#pprint.pprint(r.headers, indent=4, depth=1, width=120)
print(showline)
#if (debug == 2): print(r.text)
regex_data = p.findall(r.text)
if not regex_data:
print(showline,' No JSON block found in Response!', showline)
break
data = json.loads(p.findall(r.text)[0])
#key_stats = data['context']['dispatcher']['stores']['QuoteSummaryStore']
key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']
q_results.setdefault(ticker,[])
for i in my_ts_keys:
#j=0
try:
#res = {i: key_stats['timeSeries'][i][1]['reportedValue']['raw']}
# We need to loop over multiple items, if they exist: 0,1,2,..
zzz = key_stats['timeSeries'][i]
for j in range(len(zzz)):
if key_stats['timeSeries'][i][j]:
res = {i: key_stats['timeSeries'][i][j]['reportedValue']['raw']}
q_results[ticker].append(res)
#print(res)
#q_results[ticker].append(res)
except:
q_results[ticker].append({i:np.nan})
res = {'Company' : ticker}
q_results[ticker].append(res)
except:
pass
if (q_results):
print(q_results)
#----------------------------------------------------------
# EOF
#----------------------------------------------------------
@asafravid
Copy link

Awesome! Will integrate into yfinance and provide full credit!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment