# Hypothesis Testing for Word Frequencies
# Anna Ritz

# This program takes a toy list of word frequencies and
# performs a permutation test to determine the significance of
# the metric.

# need to import random
import random

def runPermutationTest():
    '''Runs the permutation test on the two lists and prints the significance.
    INPUTS: none
    OUTPUTS: none'''
    
    # the two original lists.
    list1 = [0.3,0.2,0.45,0.05]
    print 'list1 =',list1
    list2 = [0.1,0.3,0.5,0.1]
    print 'list2 =',list2
    origval = compareTwo(list1,list2)
    print 'lists have a distance of',origval

    tot = 1000
    print 'is that significant? Run permutation test',tot,'times.'''
    perc = runExperiments(list1,list2,origval,tot)
    print perc,'% of the random tests were at least as similar as orig list.'
    if perc < 0.05:
        print 'p-value is statistically significant'
    else:
        print 'p-value is not statistically significant'
        
    return

def runExperiments(list1,list2,origval,tot):
    '''Runs permutation tot times.
    INPUTS: two lists of integers, origval (float) tot (integer)
    OUTPUTS: percentage (float)'''

    if len(list1) != len(list2):
        print 'ERROR! lists are not the same length!'
        return -1
    
    count = 0.0
    for i in range(0,tot):
        newval = permute(list1,list2)
        # if random lists are at least as similar as orig lists, count.
        if newval <= origval: 
            count = count + 1

    return  count/tot

def permute(list1,list2):
    '''Given two lists, first randomly shuffles them and then returns
    the result of compareTwo() on the new lists.
    INPUTS: two lists of numbers
    OUTPUT: float'''

    random.shuffle(list1)
    random.shuffle(list2)

    return compareTwo(list1,list2)



def compareTwo(list1,list2):
    '''Given two lists, returns the average sum of their differences
    (from the authorship activity in class)
    INPUTS: two lists of numbers. They are the same length
    OUTPUTS: float'''
    val = 0.0
    for ind in range(0,len(list1)):
        val = val + abs(list1[ind]-list2[ind])

    # normalize by the number of words
    val = val / len(list1)
    return val


def testPermutation():
    '''tests permutation
    INPUTS: none
    OUTPUTS: none'''

    list1 = [0,0,1,0,0]
    print 'list1:',list1
    list2 = [0,0,1,0,0]
    print 'list2:',list2
    
    origval = compareTwo(list1,list2)
    print 'Orig val is',origval,'(I expect it to be 0.0)'

    print 'I expect the probability of this happening is 5/25 = 0.2'
    perc = runExperiments(list1,list2,origval,1000)
    print perc,'% when calling runExperiments()'
    return
