# L-36 MCS 275 Mon 10 Apr 2017 : tallytags.py
"""
Illustration of HTMLParser to parse an html page.
At the start of each tag, we print the attributes
when the tag is 'a'.  The tally is updated each
time we encounter the end of a tag.
"""

from html.parser import HTMLParser
from urllib.request import urlopen

class TagTally(HTMLParser):
    """
    Shows attributes for 'a' tags
    and makes a tally of ending tags.
    """
    def __init__(self):
        """
        Initializes the dictionary of tags.
        """
        HTMLParser.__init__(self)
        self.tags = {}

    def __str__(self):
        """
        Returns the string representation.
        """
        result = ''
        for tag in self.tags:
            result += str(tag) + ':' + str(self.tags[tag]) + '\n'
        return result[:-1]

    def handle_starttag(self, tag, attrs):
        """
        Looks for tags equal to 'a' and
        prints their attributes.
        """
        if tag == 'a':
            print("Attributes of tag : %s " % attrs)

    def handle_endtag(self, tag):
        """
        Maintains a tally of the tags.
        """
        if tag in self.tags:
            self.tags[tag] = self.tags[tag] + 1
        else:
            self.tags.update({tag: 1})

def main():
    """
    Opens a web page and parses it.
    """
    url = 'http://www.uic.edu'
    print('opening %s ...' % url)
    page = urlopen(url)
    tags = TagTally()
    while True:
        data = page.read(80).decode()
        if data == '':
            break
        tags.feed(data)
    tags.close()
    print('the tally of tags :')
    print(tags)

if __name__ == "__main__":
    main()