# Homework 2-4, CSCI 0931 2013

# Much of the code for this activity is provided for you.  Skeleton code is
# provided for the parts you must complete.  Specifically, I've defined some
# function signatures and descriptions.  The function signature is the line that
# begin with "def" and specifies the names of the function and its arguments.
# The function description comes right after the signature --- it's the string
# in triple quotes that says what the function does.
#
# Complete all the parts marked "TODO".  Fill in functions so that their
# behavior matches their description, and run some test cases to make sure.

# The next line is needed to provide built-in tools for cleaning up text and
# splitting it into words.  We'll discuss these tools, called "regular
# expressions" later in the semester.
import re

# Examples with dictionaries (pay attentions to the use of [] and {}):

inventory = {}  # This creates an empty dictionary.
inventory = {'orange':5, 'apple':3, 'banana':5, 'mango':4}
inventory['peach'] = 10  # Add a new key and its value to a dictionary.
inventory['banana'] = 9  # Update the value of a key already in the dictionary.
numOfBananas = inventory['banana']  # Get the value associated with a key.

for k in inventory.keys(): # Iterate through dictionary keys.
        print(k)
        print(inventory[k])


# Your name here: TODO

# Here is dictionary of movies with their box office grosses.
# The keys are movie titles (strings) and the values are domestic revenues,
# adjusted for inflation (integers).

# This dictionary is made so that you have some data to test your functions.
# Again, your functions should perform correctly on ANY input.
domesticGrosses = {'Snow White and the Seven Dwarfs':868730000,
                   'Avatar':775124500,
                   'Return of the Jedi':749653500,
                   'Jurassic Park':687060600,
                   'Gone with the Wind':1610295700,
                   'The Graduate':682004000,
                   'The Empire Strikes Back':782499700,
                   'Star Wars':1419613200,
                   'The Sound of Music':1135050500,
                   'Pirates of the Caribbean: Dead Mans Chest':515088100,
                   'The Lion King':615537400,
                   'The Godfather':629012900,
                    'Forrest Gump':626008500,
                   'The Lord of the Rings: The Return of the King':490459000,
                   'My Fair Lady':478200000,
                   'Harry Potter and the Sorcerers Stone':445626500,
                   'Spider-Man':553793400,
                   'The Dark Knight':592028200,
                   'Titanic':1022916700,
                   'E.T.':1130579000}


def printMovieNames(movie_dict):
	'''Given a dictionary of movies and revenues, print out the movie names.'''
	keys = movie_dict.keys()
	for k in keys:
		print(k)
	return

# TODO: Write a function that prints out the revenues instead.
def printMovieRevenues(dict):
	''' Given a dictionary of movies and revenues, print out the revenues.'''
	# TODO: Fill in the function body so it matches the description.
	return


def startsWithTHE(dict):
	'''Given a dictionary of movies and revenues, return a dictionary of all the
	movies whose titles start with the word 'The', and their revenues.'''
	result = {}
	for k in dict.keys():
		if k[0:3] == 'The':
			result[k] = dict[k]
	
	return result

def hugeMovies(dict):
	'''Given a dictionary of movies and revenues, return a dictionary of all the
	movies that grossed over one billion dollars.'''
	# TODO: Fill in the function body so that the function behaves according to
	# its description.
	return dict	 # This is not the correct answer. Change it to what you compute!


# TODO: Write a function that takes a movie-revenue dictionary as input, and
# returns the title of the movie that had the highest revenue.  It's a pretty
# complicated function, so here are some steps to guide you (or you can ignore
# them if you prefer):
#  1. Create two variables called biggestMovieSoFar and biggestRevenueSoFar.
#     (What should their initial values be?)
#  2. Iterate through all movies in your input, and for each one, do the
#     following
#		   a. Compare its revenue to biggestRevenueSoFar
#		   b. If its revenue is bigger, update biggestRevenueSoFar to be this bigger
#         revenue, and update biggestMovieSoFar to be its title.
#	     c. Otherwise do nothing
#  3. At the end, your biggestMovieSoFar should hold the name of the highest
#     grossing movie. Convince yourself why.
def biggestMovie(dict):
    '''Given a dictionary of movies and revenues, return the title of the
    highest-grossing movie.'''
    # TODO: Fill in the function body so that the function behaves according to
    # its description.
    return 'Gigli' # This is not correct.  Compute and return the right answer.



def buildConcordance(text):
    '''Given a string, returns a dictionary that maps unique words in the string
    to lists of integers.  The integers in each list are positions of the
    occurrences of that word in the string.'''
    
    concordance = {}    # Start with an empty dictionary.
    
    text = text.lower()
    
    # The next few lines are basically the same as cleaning up the text,
    # splitting it into a list of words, and iterating over that list.  However,
    # we've used regular expressions to do this more efficiently, while also
    # easily keeping track of the position in the string where each word is
    # found.  Don't worry too much about how this works; we'll cover it more in
    # class.
    iter = re.finditer(r"\w+", text)
    for match in iter:            # Iterate through all matches to the pattern.
        word = match.group(0)       # This is the matched string.
        position = match.start(0)   # This is the position where it was found.
        print(word + ": " + str(position))
        # TODO: Fill in the rest of this loop so that the function does what
        # its description says it should do.

    return concordance


def testBuildConcordance():
    # TODO: Fill in three test cases here.  Return False if any one of them
    # fails; return True if they all pass.


def printConcordance(word, concord, text):
    ''' Given a word, a concordance (dictionary) and the original text,
    Prints all occurrences of the word with a little bit of context surrounding
    each occurence.'''
    # TODO: Implement this function.
    return


# The following string is for testing purposes.
test_text = '''Call me Ishmael.  Some years ago--never mind how long
precisely--having little or no money in my purse, and nothing particular
to interest me on shore, I thought I would sail about a little
and see the watery part of the world.
'''

def testPrintConcordance():
    # TODO: Fill in five test cases here.  Since printConcordance() just prints
    # out values and doesn't return anything, there's no way to programmatically
    # test whether each call does the right thing or not.  Instead, just write
    # five calls to printConcordance, following each one by a series of print()
    # calls that print the expected output.  You can use the variable test_text,
    # defined above, in this function.

# Read something interesting from a file into s when you are ready to test on
# big texts.  For example, you could use something like the following line.
# s = open('C:\\WinData\\Desktop\\MobyDick.txt').read()

conc = build_concordance(test_text)
print_concordance('i', conc, test_text)














