Created
November 19, 2021 16:51
-
-
Save mohammedouahman/0006711b91f087e79ae0a46c98cf0846 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
no_pages = 2 | |
def get_data(pageNo): | |
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"} | |
r = requests.get('https://www.amazon.in/gp/bestsellers/books/ref=zg_bs_pg_'+str(pageNo)+'?ie=UTF8&pg='+str(pageNo), headers=headers)#, proxies=proxies) | |
content = r.content | |
soup = BeautifulSoup(content) | |
#print(soup) | |
alls = [] | |
for d in soup.findAll('div', attrs={'class':'a-section a-spacing-none aok-relative'}): | |
#print(d) | |
name = d.find('span', attrs={'class':'zg-text-center-align'}) | |
n = name.find_all('img', alt=True) | |
#print(n[0]['alt']) | |
author = d.find('a', attrs={'class':'a-size-small a-link-child'}) | |
rating = d.find('span', attrs={'class':'a-icon-alt'}) | |
users_rated = d.find('a', attrs={'class':'a-size-small a-link-normal'}) | |
price = d.find('span', attrs={'class':'p13n-sc-price'}) | |
all1=[] | |
if name is not None: | |
#print(n[0]['alt']) | |
all1.append(n[0]['alt']) | |
else: | |
all1.append("unknown-product") | |
if author is not None: | |
#print(author.text) | |
all1.append(author.text) | |
elif author is None: | |
author = d.find('span', attrs={'class':'a-size-small a-color-base'}) | |
if author is not None: | |
all1.append(author.text) | |
else: | |
all1.append('0') | |
if rating is not None: | |
#print(rating.text) | |
all1.append(rating.text) | |
else: | |
all1.append('-1') | |
if users_rated is not None: | |
#print(price.text) | |
all1.append(users_rated.text) | |
else: | |
all1.append('0') | |
if price is not None: | |
#print(price.text) | |
all1.append(price.text) | |
else: | |
all1.append('0') | |
alls.append(all1) | |
return alls |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment