"""
This program creates a vocabulary list from a given text file and stores it in
a variable "vocab".

Starter code written collaboratively by the CSCI0931 class, 2013-03-07.
"""

def vocabularySize(filename):
    '''Takes a filename (for a text file stored on the desktop) as an argument.
    Returns the number of unique words used in that file.'''
    return len(vocabulary(filename))


def vocabulary(filename):
    '''Takes a filename (for a text file stored on the desktop) as an argument.
    Returns the vocabulary list for that file.'''
    fileText = readFile(filename)
    wordList = fileText.split()
    return uniqueWordsSlow(wordList)


def readFile(filename):
    '''Takes a filename (string) as an argument.  This function assumes that the
    file indicated is stored on the desktop.  Returns a string that contains
    the first 10,000 characters in that file.'''
    # Change the Desktop path to be the right one for your computer!
    fileHandle = open('C:\\Users\\jmiles\\Desktop\\' + filename, 'r')
    fileText = fileHandle.read()
    fileHandle.close()
    return fileText[0:10000]


def uniqueWordsSlow(wordList):
    '''Takes a list of strings as an argument.  Returns a list of the unique
    entries of that list.  That is, all the following things are true:
      - Each element in the returned list occurs only once in that list.
      - Each element in the returned list is in the provided word list.
      - Each element in the provided word list is in the returned list.
    This function uses a slow algorithm: it takes on the order of N^2 operations
    to compute the unique words from a list of N words.'''
    uniqueList = []
    for word in wordList:
        if not(isInList(uniqueList, word)):
            uniqueList = appendToList(uniqueList, word)
    return uniqueList


def testUniqueWordsSlow():
    '''Test function for uniqueWordsSlow().  Should return True.'''
    if uniqueWordsSlow(['Call','me','maybe','me']) != ['Call','me','maybe']:
        return False
    return True


def appendToList(listOfWords, word):
    '''Takes as arguments a list of strings and a single string.  Returns a new
    list with the single string appended to the end of the provided list.'''
    return listOfWords + [word]


def isInList(listOfWords, targetWord):
    '''Takes as arguments a list of strings and a single string.  Returns True
    if the provided string is in the list; returns False otherwise.'''
    for word in listOfWords:
        if word == targetWord:
            return True
    return False


def testIsInList():
    '''Test function for isInList().  Should return True.'''
    if isInAList(['happy', 'birthday', 'to', 'you'], 'to') != True:
        return False
    if isInAList(['happy', 'birthday', 'to', 'you'], 'me') != False:
        return False
    return True


# Two ways of iterating through a list:
wordList = ['twitter', 'facebook', "jadrian's website", 'cnn.com']

# The first one is the one we know already.  It lets you look at each element of
# the list on its own, one at a time.
print('Old way of iterating through a list...')

for word in wordList:
    print('\t' + word)

# The second way is considered the "iterating-by-index" way.
# It gives you extra powers!
print('New way of interating through a list...')

for index in range(0,len(wordList)):
    if index == 0:
        print('\t' + wordList[index] + ' comes after nothing.')
    else:
        print('\t' + wordList[index] + ' comes after ' + wordList[index-1] + '.')


##########################################################################
#########################  ACTUAL HOMEWORK  ##############################
##########################################################################


def uniqueWordsFast(wordList):
    '''Takes a list of strings as an argument.  Returns a list of the unique
    entries of that list.  That is, all the following things are true:
      - Each element in the returned list occurs only once in that list.
      - Each element in the returned list is in the provided word list.
      - Each element in the provided word list is in the returned list.
    This function uses a fast algorithm.'''
    # The sorted() member function of a list returns a sorted copy of that list.
    wordList = sorted(wordList)
    uniqueList = [wordList[0]]
    
    #
    # FILL IN THE REST OF THIS FUNCTION.
    #
    
    return uniqueList


def testUniqueWordsFast():
    '''Test function for uniquewordsFast().  Should return True.'''
    #
    # FILL IN SOME TESTS.
    #


def cleanUp(s):
    '''Takes a string as argument, and returns a cleaned-up string,
    free of numbers and punctuations, all lowercase.'''
    result = s.lower()
    result = removeNumbers(result)
    result = removePunctuation(result)
    return result


def removeNumbers(s):
    '''Takes a string as argument, and returns another string,
    replacing numbers with whitespaces.'''
    result =''
    for char in s:
        if(char=='0' or char=='1' or char=='2' or char=='3'
           or char=='4' or char=='5' or char=='6' or char=='7'
           or char=='8' or char=='9'):
            result += ' '
        else:
            result += char
    return result


def removePunctuation(s):
    '''Takes a string as argument, and returns another string,
    replacing punctuation marks with whitespaces.
    '''
    #
    # FILL IN THIS FUNCTION.
    #
    
    return s   # dummy value to return


def testRemovePunctuation():
    '''Test function for removePunctuation().  Should return True.'''
    #
    # FILL IN SOME TESTS.
    #


def averageWordLength(filename, minLength):
    '''Takes a filename (string) and a minimum word length (integer) as
    arguments, and returns the average length of the unique words in the text
    at least as long as that minimum length.'''
    fileText = cleanUp(readFile(filename))
    wordList = uniqueWordsFast(fileText.split())
    return averageWordLengthInList(wordList, minLength)


def averageWordLengthInList(wordList, minLength):
    '''Takes a word list (list of strings) and a minimum word length (integer)
    as arguments, and returns the average length of the words in list that are
    at least as long as this minimum.'''
    #
    # FILL IN THIS FUNCTION.
    #


def testAverageWordLengthInList():
    '''Test function for averageWordLengthInList().  Should return True.'''
    #
    # FILL IN SOME TESTS.
    #
