Created
July 28, 2020 11:15
-
-
Save RamonWill/5572bb49916c87d2f3227bf6ff26f5a3 to your computer and use it in GitHub Desktop.
this is from video tutorial on how to create a website for morrisons.com using OOP concepts. It will also show you how to store this data to a CSV and Database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlite3 | |
from bs4 import BeautifulSoup | |
import requests as re | |
import pandas as pd | |
# This code is from my youtube video: https://www.youtube.com/watch?v=ii7CfpdRPYA | |
def main(): | |
url = "https://groceries.morrisons.com/browse/fresh-176739" | |
page = MorrisonsWebpage(url) | |
page_products = page.get_products() | |
conn = sqlite3.connect("MorrionsProduct.db") | |
c = conn.cursor() | |
c.execute(""" CREATE TABLE IF NOT EXISTS products( | |
id INTEGER PRIMARY KEY, | |
name VARCHAR(250) NOT NULL, | |
url VARCHAR(250) NOT NULL, | |
price REAL, | |
rating REAL)""") | |
for product in page_products: | |
print(f"Inserting {product}...") | |
c.execute("""INSERT INTO products VALUES( | |
null, :name, :url, :price, :rating)""", vars(product)) | |
conn.commit() | |
c.close() | |
conn.close() | |
df = pd.DataFrame([vars(product) for product in page_products]) | |
df.to_csv("morrisons.csv") | |
class MorrisonsProduct(object): | |
def __init__(self, name, url, price, rating): | |
self.name = name | |
self.url = url | |
self.price = price | |
self.rating = rating | |
def __repr__(self): | |
return f"Product: {self.name}" | |
def __eq__(self, other): | |
return (isinstance(other, type(self)) | |
and (self.name, self.url)== | |
(other.name, other.url)) | |
def __hash__(self): | |
return hash((self.name, self.url)) | |
class MorrisonsWebpage(object): | |
def __init__(self, url): | |
self._url = url | |
self._page_element = None | |
self.products = set() | |
if "https://groceries.morrisons.com/" not in url: | |
msg = "url must contain https://groceries.morrisons.com/" | |
raise AttributeError(msg) | |
else: | |
self._create_soup_element() | |
self._extract_products() | |
def get_products(self): | |
return list(self.products) | |
def _create_soup_element(self): | |
page = re.get(self._url) | |
page_text = page.text | |
soup = BeautifulSoup(page_text, "html.parser") | |
elements = soup.find_all("div", class_="fop-contentWrapper") | |
self._page_element = elements | |
return None | |
def _create_product(self, product): | |
title = product.h4["title"] | |
link = "https://groceries.morrisons.com" + product.a["href"] | |
price = self._parse_price(product) | |
rating = self._parse_rating(product) | |
Product = MorrisonsProduct(title, link, price, rating) | |
return Product | |
def _parse_price(self, product): | |
offer_price = product.find("span", class_= "fop-price price-offer") | |
normal_price = product.find("span", class_="fop-price") | |
product_price = None | |
if offer_price is not None: | |
product_price = offer_price.string | |
elif normal_price is not None: | |
product_price = normal_price.string | |
else: | |
return None | |
if "p" in product_price: | |
product_price_pence = product_price.replace("p", "") | |
return float(product_price_pence)/100 | |
else: | |
return float(product_price[1:]) | |
def _parse_rating(self, product): | |
rating = product.find("span", class_="fop-rating-inner") | |
product_rating = None | |
if rating is not None: | |
product_rating = float(rating["title"][8:13]) | |
return product_rating | |
def _extract_products(self): | |
for element in self._page_element: | |
Product = self._create_product(element) | |
self.products.add(Product) | |
return None | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment