#!/usr/bin/env python3

import re
import sys
import argparse
import textwrap
import socket
import requests

def valid_extension(html):
    if "Extension Not Found" in html:
        return False
    else:
        return True


def get_file_type(html):
    return re.findall('<span itemprop="name">(.*?)</span></h2>', html)[0]


def get_description(html):
    messy_description = re.findall('<span itemprop="description">(.*?)</span>', html)[0]

    # https://tutorialedge.net/python/removing-html-from-string/
    # This removes all of the HTML tags in the description
    # It also removes all of the double spaces and replacs it with single spaces.
    return re.compile(r'<[^>]+>').sub('', messy_description).replace("  ", " ")

def internet_connection():
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 80))

        return True
    except OSError:
        print("Internet connection is required!")
        sys.exit()

def main():
    parser = argparse.ArgumentParser(description="Get information on over 10,000 file extensions right from the terminal")

    parser.add_argument('extension', action="store", help="File extention")

    args = parser.parse_args()

    # Check if the user is connected to the internet
    internet_connection()

    # Using the split function lets the user give a file as
    # an argument or a file extension with or without the dot (.)
    # Example: script.py OR .py OR py
    extension = args.extension.split(".")[-1]

    url = "https://fileinfo.com/extension/{}".format(extension.replace(".", ""))

    r = requests.get(url)
    html = r.text

    if not valid_extension(html):
        print("Extension Not Found!")
        sys.exit()

    file_type = get_file_type(html)
    description = get_description(html)

    print("\033[1mFull form: \033[0m" + file_type)
    print("\033[1m\nWhat is a " + extension.replace(".", "") + " file?\033[0m")
    print(textwrap.fill(description, 71))

if __name__ == "__main__":
    main()
