RamonWill/WebscrapeTutorial.py

## WebscrapeTutorial.py
import sqlite3

from bs4 import BeautifulSoup

import requests as re

import pandas as pd

# This code is from my youtube video: https://www.youtube.com/watch?v=ii7CfpdRPYA

def main():
    url = "https://groceries.morrisons.com/browse/fresh-176739"
    page = MorrisonsWebpage(url)
    page_products = page.get_products()

    conn = sqlite3.connect("MorrionsProduct.db")
    c = conn.cursor()

    c.execute(""" CREATE TABLE IF NOT EXISTS products(
                    id INTEGER PRIMARY KEY,
                    name VARCHAR(250) NOT NULL,
                    url VARCHAR(250) NOT NULL,
                    price REAL,
                    rating REAL)""")
    for product in page_products:
        print(f"Inserting {product}...")
        c.execute("""INSERT INTO products VALUES(
                        null, :name, :url, :price, :rating)""", vars(product))

    conn.commit()
    c.close()
    conn.close()

    df = pd.DataFrame([vars(product) for product in page_products])
    df.to_csv("morrisons.csv")

class MorrisonsProduct(object):
    def __init__(self, name, url, price, rating):
        self.name = name
        self.url = url
        self.price = price
        self.rating = rating

    def __repr__(self):
        return f"Product: {self.name}"

    def __eq__(self, other):
        return (isinstance(other, type(self))
                and (self.name, self.url)==
                (other.name, other.url))

    def __hash__(self):
        return hash((self.name, self.url))


class MorrisonsWebpage(object):
    def __init__(self, url):
        self._url = url
        self._page_element = None
        self.products = set()

        if "https://groceries.morrisons.com/" not in url:
            msg = "url must contain https://groceries.morrisons.com/"
            raise AttributeError(msg)
        else:
            self._create_soup_element()
            self._extract_products()

    def get_products(self):
        return list(self.products)

    def _create_soup_element(self):
        page = re.get(self._url)
        page_text = page.text
        soup = BeautifulSoup(page_text, "html.parser")
        elements = soup.find_all("div", class_="fop-contentWrapper")
        self._page_element = elements
        return None

    def _create_product(self, product):
        title = product.h4["title"]
        link = "https://groceries.morrisons.com" + product.a["href"]
        price = self._parse_price(product)
        rating = self._parse_rating(product)

        Product = MorrisonsProduct(title, link, price, rating)
        return Product

    def _parse_price(self, product):
        offer_price = product.find("span", class_= "fop-price price-offer")
        normal_price = product.find("span", class_="fop-price")
        product_price = None

        if offer_price is not None:
            product_price = offer_price.string
        elif normal_price is not None:
            product_price = normal_price.string
        else:
            return None

        if "p" in product_price:
            product_price_pence = product_price.replace("p", "")
            return float(product_price_pence)/100
        else:
            return float(product_price[1:])

    def _parse_rating(self, product):
        rating = product.find("span", class_="fop-rating-inner")
        product_rating = None
        if rating is not None:
            product_rating = float(rating["title"][8:13])
        return product_rating

    def _extract_products(self):
        for element in self._page_element:
            Product = self._create_product(element)
            self.products.add(Product)
        return None

if __name__ == "__main__":
    main()
	import sqlite3

	from bs4 import BeautifulSoup

	import requests as re

	import pandas as pd

	# This code is from my youtube video: https://www.youtube.com/watch?v=ii7CfpdRPYA

	def main():
	url = "https://groceries.morrisons.com/browse/fresh-176739"
	page = MorrisonsWebpage(url)
	page_products = page.get_products()

	conn = sqlite3.connect("MorrionsProduct.db")
	c = conn.cursor()

	c.execute(""" CREATE TABLE IF NOT EXISTS products(
	id INTEGER PRIMARY KEY,
	name VARCHAR(250) NOT NULL,
	url VARCHAR(250) NOT NULL,
	price REAL,
	rating REAL)""")
	for product in page_products:
	print(f"Inserting {product}...")
	c.execute("""INSERT INTO products VALUES(
	null, :name, :url, :price, :rating)""", vars(product))

	conn.commit()
	c.close()
	conn.close()

	df = pd.DataFrame([vars(product) for product in page_products])
	df.to_csv("morrisons.csv")

	class MorrisonsProduct(object):
	def __init__(self, name, url, price, rating):
	self.name = name
	self.url = url
	self.price = price
	self.rating = rating

	def __repr__(self):
	return f"Product: {self.name}"

	def __eq__(self, other):
	return (isinstance(other, type(self))
	and (self.name, self.url)==
	(other.name, other.url))

	def __hash__(self):
	return hash((self.name, self.url))


	class MorrisonsWebpage(object):
	def __init__(self, url):
	self._url = url
	self._page_element = None
	self.products = set()

	if "https://groceries.morrisons.com/" not in url:
	msg = "url must contain https://groceries.morrisons.com/"
	raise AttributeError(msg)
	else:
	self._create_soup_element()
	self._extract_products()

	def get_products(self):
	return list(self.products)

	def _create_soup_element(self):
	page = re.get(self._url)
	page_text = page.text
	soup = BeautifulSoup(page_text, "html.parser")
	elements = soup.find_all("div", class_="fop-contentWrapper")
	self._page_element = elements
	return None

	def _create_product(self, product):
	title = product.h4["title"]
	link = "https://groceries.morrisons.com" + product.a["href"]
	price = self._parse_price(product)
	rating = self._parse_rating(product)

	Product = MorrisonsProduct(title, link, price, rating)
	return Product

	def _parse_price(self, product):
	offer_price = product.find("span", class_= "fop-price price-offer")
	normal_price = product.find("span", class_="fop-price")
	product_price = None

	if offer_price is not None:
	product_price = offer_price.string
	elif normal_price is not None:
	product_price = normal_price.string
	else:
	return None

	if "p" in product_price:
	product_price_pence = product_price.replace("p", "")
	return float(product_price_pence)/100
	else:
	return float(product_price[1:])

	def _parse_rating(self, product):
	rating = product.find("span", class_="fop-rating-inner")
	product_rating = None
	if rating is not None:
	product_rating = float(rating["title"][8:13])
	return product_rating

	def _extract_products(self):
	for element in self._page_element:
	Product = self._create_product(element)
	self.products.add(Product)
	return None

	if __name__ == "__main__":
	main()