# L-24 MCS 507 Mon 16 Oct 2023 : htmlrefs.py
"""
Illustration of HTMLParser to parse an html page,
makes list of html pages this page refers to.
"""
from html.parser import HTMLParser
from urllib.request import urlopen

class HTMLrefs(HTMLParser):
    """
    Makes a list of all html links.
    """
    def __init__(self):
        """
        Initializes the list of links.
        """
        HTMLParser.__init__(self)
        self.refs = []

    def __str__(self):
        """
        Returns the string rep of the links.
        """
        result = ''
        for link in self.refs:
            result += link + '\n'
        return result[:-1]

    def handle_starttag(self, tag, attrs):
        """
        Looks for tags equal to 'a' and
        stores links for href attributes.
        """
        print(attrs)
        if tag == 'a':
            F = [x_y for x_y in attrs if x_y[0] == 'href']
            L = [y for (x, y) in F]
            self.refs = self.refs + L

def main():
    """
    Opens a web page and parses it.
    """
    url = 'http://www.uic.edu/'
    print('opening %s ...' % url)
    page = urlopen(url)
    refs = HTMLrefs()
    while True:
        data = page.read(80).decode()
        if data == '':
            break
        refs.feed(data)
    refs.close()
    print('all html links :')
    print(refs)

if __name__ == "__main__":
    main()