"""
This program creates a vocabulary list from a given text file and stores it in
a variable "vocab".

Starter code written collaboratively by the CSCI0931 class, 2015-03-10 
(you did something similar earlier). The function name has changed from 
noReplicates to uniqueWords, but the functionality is the same as before.
"""

def vocabSize(filename):
    '''Takes a filename (for a text file stored on the desktop) as an argument.
    Returns the number of unique words used in that file.'''
    return len(vocabulary(filename))


def vocabulary(filename):
    '''Takes a filename (for a text file stored on the desktop) as an argument.
    Returns the vocabulary list for that file.'''
    fileText = readFile(filename)
    wordList = fileText.split()
    return uniqueWordsSlow(wordList)


def readFile(filename):
    '''Takes a filename (string) as an argument.  This function assumes that the
    file indicated is stored on the desktop.  Returns a string that contains
    the first 10,000 characters in that file. Similar to print1000() from before.'''
    # Change the Desktop path to be the right one for your computer!
    fileHandle = open('C:\\Users\\srgomez\\Desktop\\' + filename, 'r')
    fileText = fileHandle.read()
    fileHandle.close()
    return fileText[0:10000]


def uniqueWordsSlow(wordList):
    '''Takes a list of strings as an argument.  Returns a list of the unique
    entries of that list.  That is, all the following things are true:
      - Each element in the returned list occurs only once in that list.
      - Each element in the returned list is in the provided word list.
      - Each element in the provided word list is in the returned list.
    This function uses a slow algorithm: when you factor in the calls to
    isElementOf(), the program will potentially do a lot of comparisons for
    each word, especially if there are many duplicates.'''
    uniqueList = []
    for word in wordList:
        if not isElementOf(word, uniqueList):
            uniqueList = appendToList(uniqueList, word)
    return uniqueList


def testUniqueWordsSlow():
    '''Test function for uniqueWordsSlow(). This test tries one case. Should return True.'''
    if uniqueWordsSlow(['Call','me','maybe','me']) != ['Call','me','maybe']:
        return False
    return True


def appendToList(listOfWords, word):
    '''Takes as arguments a list of strings and a single string.  Returns a new
    list with the single string appended to the end of the provided list.'''
    return listOfWords + [word]


def isElementOf(targetWord,listOfWords):
    '''Takes a string and a list and returns True if the string is
    in the list and False otherwise.'''
    for word in listOfWords:
        if word == targetWord:
            return True
    return False


def testElementOf():
    '''Test function for isInList(). This test tries two cases. Should return True.'''
    if isElementOf('to', ['happy', 'birthday', 'to', 'you']) != True:
        return False
    if isElementOf('me', ['happy', 'birthday', 'to', 'you']) != False:
        return False
    return True


# Two ways of iterating through a list:
wordList = ['twitter', 'facebook', "steve's website", 'cnn.com']

# The first one is the one we know already.  It lets you look at each element of
# the list on its own, one at a time.
print('Old way of iterating through a list...')

for word in wordList:
    print('\t' + word)

# The second way is considered the "iterating-by-index" way.
# It gives you extra powers!
print('New way of interating through a list...')

for index in range(0,len(wordList)):
    if index == 0:
        print('\t' + wordList[index] + ' comes after nothing.')
    else:
        print('\t' + wordList[index] + ' comes after ' + wordList[index-1] + '.')


##########################################################################
####################  ACTUAL HOMEWORK from 2-5  #########################
##########################################################################


def uniqueWordsFast(wordList):
    '''Takes a list of strings as an argument.  Returns a list of the unique
    entries of that list.  That is, all the following things are true:
      - Each element in the returned list occurs only once in that list.
      - Each element in the returned list is in the provided word list.
      - Each element in the provided word list is in the returned list.
    This function uses a fast algorithm.

    Task 3.2: YOUR EXPLANATION GOES HERE
    '''
    # The sorted() member function of a list returns a sorted copy of that list.
    wordList = sorted(wordList)

    # Start a list of uniques with the first element already added
    # unless you have an empty list of words as input
    if (len(wordList) == 0):
        return [] 
    uniqueList = [wordList[0]]
    
    #
    # TODO: FILL IN THE REST OF THIS FUNCTION.
    #
    
    return uniqueList


def testUniqueWordsFast():
    '''Test function for uniquewordsFast().  Should return True.'''
    #
    # TODO: FILL THIS IN. You should test three cases that could potentially 
    # fail (and return False) before returning True. 
    # Refer to the test functions above to see how this should work.
    #

    return True



#### This is the end of part 2-5!.
#### Save everything below this area for 2-6.
##########################################################################
####################  ACTUAL HOMEWORK from 2-6  #########################
##########################################################################


def cleanUp(s):
    '''Takes a string as argument, and returns a cleaned-up string,
    free of numbers and punctuations, all lowercase.'''
    result = s.lower()
    result = removeNumbers(result)
    result = removePunctuation(result)
    return result


def removeNumbers(s):
    '''Takes a string as argument, and returns another string,
    replacing numbers with whitespaces.'''
    result =''
    for char in s:
        if(char=='0' or char=='1' or char=='2' or char=='3'
           or char=='4' or char=='5' or char=='6' or char=='7'
           or char=='8' or char=='9'):
            result += ' '
        else:
            result += char
    return result


def removePunctuation(s):
    '''Takes a string as argument, and returns another string,
    replacing punctuation marks with whitespaces.
    '''
    #
    # TODO: FILL THIS IN.
    #
    
    return s   # For now, just return the input


def testRemovePunctuation():
    '''Test function for removePunctuation().  Should return True.'''
    #
    # TODO: FILL THIS IN. You should test three cases that could potentially 
    # fail (and return False) before returning True. 
    # Refer to the test functions above to see how this should work.
    #

    return True

print("Run tests for uniqueWorksFast and Remove Punctiontuation")

print('\ttestUniqueWordsFast(): ' + str(testUniqueWordsFast()))

print('\ttestRemovePunctuation(): ' + str(testRemovePunctuation()))


##########################################################################
##########################  EXTRA CREDIT  ################################
##########################################################################


def averageWordLength(filename, minLength):
    '''Takes a filename (string) and a minimum word length (integer) as
    arguments, and returns the average length of the unique words in the text
    at least as long as that minimum length.'''
    fileText = cleanUp(readFile(filename))
    wordList = uniqueWordsFast(fileText.split())
    return averageWordLengthInList(wordList, minLength)


def averageWordLengthInList(wordList, minLength):
    '''Takes a word list (list of strings) and a minimum word length (integer)
    as arguments, and returns the average length of the words in list that are
    at least as long as this minimum.'''
    #
    # TODO: FILL IN THIS FUNCTION.
    #

    return -1 # For now, just return -1

def testAverageWordLengthInList():
    '''Test function for averageWordLengthInList().  Should return True.'''
    #
    # TODO: FILL IN SOME TESTS.
    #

    return True
