# L-24 MCS 275 Mon 16 Oct 2023 : scanhttplinks.py
"""
Prompts the user for a URL, opens the page and
shows the list of all double quoted strings
which begin with http.
"""
from scanquotes import update_qstrings

def httpfilter(strings):
    """
    Returns from the list strings only
    those strings which begin with http.
    """
    result = []
    for name in strings:
        if len(name) > 4:
            if name[0:4] == 'http':
                result.append(name)
    return result

def httplinks(url):
    """
    Given the URL for the web page,
    returns the list of all http strings.
    """
    from urllib.request import urlopen
    try:
        print('opening ' + url + ' ...')
        page = urlopen(url)
    except:
        print('opening ' + url + ' failed')
        return []
    (result, buf) = ([], '')
    while True:
        try:
            data = page.read(80).decode()
        except:
            print('could not decode data')
            break
        if data == '':
            break
        (result, buf) = update_qstrings(result, buf, data)
        result = httpfilter(result)
    page.close()
    return result

def show_locations(links):
    """
    Shows the locations of the URLs in links.
    """
    from urllib.parse import urlparse
    for url in links:
        pieces = urlparse(url)
        print(pieces[1])

def main():
    """
    Prompts the user for a web page,
    and prints all URLs this page refers to.
    """
    print('listing reachable locations')
    page = input('Give URL : ')
    links = httplinks(page)
    print('found %d HTTP links' % len(links))
    show_locations(links)

if __name__ == "__main__":
    main()
