def computeVocabList(path):
    text = readText(path)
    text = cleanup(text)
    text = text.lower()
    words = text.split()
    vocab = removeDuplicates(words)
    return vocab

def readText(path):
    openedFile = open('Z:/WinData/Desktop/' + path)
    text = openedFile.read()
    return text

def removeDuplicates(words):
    words.sort()
    vocab = [words[0]]
    for index in range(1,len(words)):
        current = words[index]
        previous = words[index-1]
        if current == previous:
            pass
        else:
            vocab += [current]
    return vocab

def cleanup(text):
    noNum = removeNum(text)
    noPunc = removePunc(noNum)
    return noPunc

def removePunc(text):
    result = ''
    for c in text:
        if (c == '.' or c == ',' or c == '?' or c == '!' or c == '('
            or c == ')' or c == ':' or c == ';' or c == "'" or c == '"'
            or c == '&' or c == '-' or c == '*' or c == '%' or c == '['
            or c == ']' or c == '$' or c == '/' or c == '#'):
            result += ' '
        else:
            result += c
 
    return result

def removeNum(text):
    result = ''
    for c in text:
        if (c == '0' or c == '1' or c == '2' or c == '3' or c == '4' or c == '5'
            or c == '6' or c =='7' or c == '8' or c =='9'):
            result += ' '
        else:
            result += c
    return result


ebooks = ['1-The Kama Sutra', '2-Pride and Prejudice', '3-The Adventures of Sherlock Holmes',
          '4-How to Analyze People on Sight', '5-Adventures of Huckleberry Finn',
          '6-Alice\'s Adventures in Wonderland', '7-Beowulf', '8-Grimm\'s Fairy Tales',
          '9-Metamorphosis', '10-Ulysses']

def printVocabSizes(bookList):

    print('implement me...')
        
    return


printVocabSizes(ebooks)

