adifahmi/html_checker.py

## html_checker.py
import re
import argparse
import urllib.parse
import urllib.request

htmlRegex = '<[^\!][^>]*>'
voidElementsRegex = '</?(?!area|base|br|col|embed|hr|img|input|keygen|link|menuitem|meta)'
openingTagRegex = '<[^/]'
closingTagRegex = '</'

parser = argparse.ArgumentParser()
parser.add_argument('-i','--input',)


def get_tag_list(html):
    tags = re.compile(htmlRegex, flags=re.I | re.M)
    tag_list = re.findall(tags, html)
    return tag_list


def get_opening_tag_list(tag_list):
    opening_tag = list(
        filter(
            lambda tag: re.match(openingTagRegex, tag),
            tag_list
        )
    )
    return opening_tag


def get_closing_tag_list(tag_list):
    closing_tag_list = list(
        filter(
            lambda tag: re.match(closingTagRegex, tag),
            tag_list
        )
    )
    return closing_tag_list


def clean_html(raw_html):
    cleantext = re.sub(r'\W+', '', raw_html)
    return cleantext


def clean_list(the_list):
    for idx, val in enumerate(the_list):
        the_list[idx] = clean_html(val)
    return the_list


# Simple check if opening tags are equal closing tags
def is_match_count(opening_tag_list, closing_tag_list):
    if len(opening_tag_list) != len(closing_tag_list):
        return False
    return True


# will check if tag is closed by equal tag
# ex: `head` must be closed by `head` too
def is_mismatch(opening_tag_list, closing_tag_list):
    o = clean_list(opening_tag_list)
    c = clean_list(closing_tag_list)
    reverse_c = c[::-1] # need to be reversed since html closing tags works in reversed

    if o != reverse_c:
        return False
    return True


def is_tag_completed():
    args = parser.parse_args()
    html = args.input
    tag_list = get_tag_list(html)
    # print(tag_list)
    opening_tag_list = get_opening_tag_list(tag_list)
    closing_tag_list = get_closing_tag_list(tag_list)

    # print(opening_tag_list)
    # print(closing_tag_list)

    if is_match_count(opening_tag_list, closing_tag_list) is False:
        print("MISMATCHED TAGS COUNT")
        print("FALSE")
        return False
    elif is_mismatch(opening_tag_list, closing_tag_list) is False:
        print("MISMATCHED TAGS")
        print("FALSE")
        return False
    else:
        print("TRUE")
        return True


if __name__ == '__main__':
    is_tag_completed()
	import re
	import argparse
	import urllib.parse
	import urllib.request

	htmlRegex = '<[^\!][^>]*>'
	voidElementsRegex = '</?(?!area\|base\|br\|col\|embed\|hr\|img\|input\|keygen\|link\|menuitem\|meta)'
	openingTagRegex = '<[^/]'
	closingTagRegex = '</'

	parser = argparse.ArgumentParser()
	parser.add_argument('-i','--input',)


	def get_tag_list(html):
	tags = re.compile(htmlRegex, flags=re.I \| re.M)
	tag_list = re.findall(tags, html)
	return tag_list


	def get_opening_tag_list(tag_list):
	opening_tag = list(
	filter(
	lambda tag: re.match(openingTagRegex, tag),
	tag_list
	)
	)
	return opening_tag


	def get_closing_tag_list(tag_list):
	closing_tag_list = list(
	filter(
	lambda tag: re.match(closingTagRegex, tag),
	tag_list
	)
	)
	return closing_tag_list


	def clean_html(raw_html):
	cleantext = re.sub(r'\W+', '', raw_html)
	return cleantext


	def clean_list(the_list):
	for idx, val in enumerate(the_list):
	the_list[idx] = clean_html(val)
	return the_list


	# Simple check if opening tags are equal closing tags
	def is_match_count(opening_tag_list, closing_tag_list):
	if len(opening_tag_list) != len(closing_tag_list):
	return False
	return True


	# will check if tag is closed by equal tag
	# ex: `head` must be closed by `head` too
	def is_mismatch(opening_tag_list, closing_tag_list):
	o = clean_list(opening_tag_list)
	c = clean_list(closing_tag_list)
	reverse_c = c[::-1] # need to be reversed since html closing tags works in reversed

	if o != reverse_c:
	return False
	return True


	def is_tag_completed():
	args = parser.parse_args()
	html = args.input
	tag_list = get_tag_list(html)
	# print(tag_list)
	opening_tag_list = get_opening_tag_list(tag_list)
	closing_tag_list = get_closing_tag_list(tag_list)

	# print(opening_tag_list)
	# print(closing_tag_list)

	if is_match_count(opening_tag_list, closing_tag_list) is False:
	print("MISMATCHED TAGS COUNT")
	print("FALSE")
	return False
	elif is_mismatch(opening_tag_list, closing_tag_list) is False:
	print("MISMATCHED TAGS")
	print("FALSE")
	return False
	else:
	print("TRUE")
	return True


	if __name__ == '__main__':
	is_tag_completed()