# Code implemented for CS931, Spring 2015
# Lesson: Twitter and While Loops
# TO USE THIS FILE, YOU SHOULD FILL YOUR AUTH DATA - REFER TO THE CLASS SLIDES

import tweepy
import codecs
 
# Consumer keys and access tokens, used for authentication
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
 
# Uses the authentication info above and returns an initialized Tweepy API object
def initialize():
	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)

	api = tweepy.API(auth)

	return api

# Searches for tweets based on a specific term
def getTweetsSearch(term, quantity, location):
	'''Returns a list of up to *quantity* tweets that match *term*,
	   around a specified *location*.
		 INPUTS: term:     seach term (e.g. "exams")
		         quantity: number of requested tweets (e.g. 500)
						 location: location/radius in the form 'lat,lng,radius' (e.g. "41.8262,-71.4032,10mi)
						           or an empty string to accept any location.
		 OUTPUTS: a list of tweets, with size smaller or equal than quantity
		 string * int * string -> list(tweet)'''
	if quantity > 3000:
		print("If you ask for too many tweets too fast, Twitter may block you.")
		print("I'll be cautious and not give you that. If you don't like this behavior, remove this part of the code, and run again.")
		return []

	print("Getting tweets related to", term)
	print("Quantity:", quantity)
	print("Location:", location)

	# results: list containing the accumulated tweets
	# done: flag that indicates if we are done
	results = []
	done = False

	# Do the first search
	print("Getting 100 tweets...")
	results = api.search(q=term, count=100, geocode=location)
	print("got", len(results))

	# If nothing is received, we are done.
	if results == []:
		return []

	# If we have more than what we need, we are done.
	# Return up to *quantity* tweets
	if len(results) >= quantity:
		return results[0:quantity]

	# Get the minimum ID of the received tweets (IDs are positive numbers)
	minID = results[0].id
	for tweet in results[1:len(results)]:
		if tweet.id < minID:
			minID = tweet.id

	while not done:
			print("Getting 100 tweets...")

			# Get tweets with ID smaller than the minimum ID of the tweets we have
			tempResults = api.search(q=term, count=100, geocode=location, max_id=(minID - 1))

			# Accumulate the new tweets in the result variable
			results = results + tempResults

			# If this search returned nothing or we already got more than we need, we're done
			if tempResults == [] or len(results) >= quantity:
				done = True

			# Update the minimum ID because we received new tweets
			for tweet in tempResults:
				if tweet.id < minID:
					minID = tweet.id

			print("got", len(tempResults))

	return results[0:quantity]

# Queries a specific user's timeline
def getTweetsTimeline(user, quantity):
	'''Returns a list of up to *quantity* tweets from *user*'s timeline.
		 INPUTS: user:     the username associated with the timeline (e.g. 'BrownUniversity')
		         quantity: number of requested tweets (e.g. 500)
		 OUTPUTS: a list of tweets, with size smaller or equal than quantity
		 string * int -> list(tweet)'''
	
	if quantity > 3000:
		print("If you ask for too many tweets too fast, some Twitter user may block you.")
		print("I'll be cautious and not give you that. If you don't like this behavior, remove this part of the code, and run again.")
		return []

	print("Getting tweets from user", user)
	print("Quantity:", quantity)

	# results: list containing the accumulated tweets
	# done: flag that indicates if we are done
	results = []
	done = False

	# Do the first search
	print("Getting 100 tweets...")
	results = api.user_timeline(screen_name=user, count=100)
	print("got", len(results))

	# If nothing is received, we are done.
	if results == []:
		return []

	# If we have more than what we need, we are done.
	# Return up to *quantity* tweets
	if len(results) >= quantity:
		return results[0:quantity]

	# Get the minimum ID of the received tweets (IDs are positive numbers)
	minID = results[0].id
	for tweet in results[1:len(results)]:
		if tweet.id < minID:
			minID = tweet.id

	while not done:
			print("Getting 100 tweets...")

			# Get tweets with ID smaller than the minimum ID of the tweets we have
			tempResults = api.user_timeline(screen_name=user, count=100,  max_id=(minID - 1))

			# Accumulate the new tweets in the result variable
			results = results + tempResults

			# If this search returned nothing or we already got more than we need, we're done
			if tempResults == [] or len(results) >= quantity:
				done = True

			# Update the minimum ID because we received new tweets
			for tweet in tempResults:
				if tweet.id < minID:
					minID = tweet.id

			print("got", len(tempResults))

	return results[0:quantity]
 
# Transform tweet objects into dictionaries. This function is not really necessary for anything
# except that dictionaries are more familiar than tweet objects.
def processTweets(listResults):
	'''Receives a list of tweets, and returns a list of dictionaries.
	   Each dictionary represents a tweet in a more 'standard' form for CS931.
		 Newlines and commas are substituted by spaces in the tweet (making the text suitable for a CSV file).

		 list(tweet) -> list(dict)'''
	listTweets = []
	for result in listResults:
		text = result.text
		text = text.replace("\'", " ")
		text = text.replace("\"", " ")
		text = text.replace(",", " ")
		text = text.replace("\r", " ")
		text = text.replace("\n", " ")

		location = result.user.location
		location = location.replace("\'", " ")
		location = location.replace("\"", " ")
		location = location.replace(",", " ")
		location = location.replace("\r", " ")
		location = location.replace("\n", " ")

		tweet = {'text': text,
		         'user': result.user.screen_name,
		         'created_at': result.created_at,
						 'source': result.source,
						 'time_zone': result.user.time_zone,
						 'location': location,
						 'coordinates': result.coordinates,
						 'retweet_count': result.retweet_count,
						 'favorite_count': result.favorite_count}
		listTweets.append(tweet)

	return listTweets

# Given a tweet, print it in the screen
def printTweet(tweet):
		print(tweet['text'])
		print("user:", tweet['user'])
		print("created at:", tweet['created_at'])
		print("time zone:", tweet['time_zone'])
		print("location:", tweet['location'])
		
		point = tweet['coordinates']
		if point:
			# Logintude at index 0, latitude at index 1 (weird...)
			print("coordinates:", point['coordinates'][1] + "," + point['coordinates'][0])

		print("retweet count: ", tweet['retweet_count'])
		print("favorite count: ", tweet['favorite_count'])
		print("\n")

# Given a tweet, print it in the form of a CSV-file row.
def printTweetCSV(csvFile, tweet):
		line = ""

		line += tweet['text']
		line += "," +  tweet['user']
		line += "," +  str(tweet['created_at'])
		line += "," +  str(tweet['time_zone'])
		line += "," +  tweet['location']
		
		point = tweet['coordinates']
		if point:
			# Logintude at index 0, latitude at index 1 (weird...)
			line += ",(" + str(point['coordinates'][1]) + "/" + str(point['coordinates'][0])  + ")"
		else:
			line += ",()"

		line += "," + str(tweet['retweet_count'])
		line += "," + str(tweet['favorite_count'])
		line += "\n"

		csvFile.write(line)


################
# MAIN PROGRAM #
################

# Initialize the API
api = initialize()

# Queries a user's timeline (default example is Brown University's timeline)
listTweetsT = getTweetsTimeline("BrownUniversity", 500)

# Generate a CSV file for the user's timeline
# I use this alternative module to generate files, because tweets often contain foreign characters.
# The codecs module allows us to generate a UTF-8 file, which accounts for many different writing systems.
listTweetsProcessedT = processTweets(listTweetsT)
csvFileT = codecs.open("user.csv", "w", "utf-8")
for tweet in listTweetsProcessedT:
	printTweetCSV(csvFileT, tweet)
	#printTweet(tweet)
csvFileT.close()

# Searches for a term (default example is 200mi around Boston)
listTweetsS = getTweetsSearch("exams", 500, "42.3601,-71.0589,200mi")

# Generate a CSV file for the search term
# I use this alternative module to generate files, because tweets often contain foreign characters.
# The codecs module allows us to generate a UTF-8 file, which accounts for many different writing systems.
listTweetsProcessedS = processTweets(listTweetsS)
csvFileS = codecs.open("term.csv", "w", "utf-8")
for tweet in listTweetsProcessedS:
	printTweetCSV(csvFileS, tweet)
	#printTweet(tweet)
csvFileS.close()
