# Monte Carlo simulation to calculate hash collision rate for random passwords
# using the Midterm 2 password hash function

import string
import random

# Hash the string s by multiplying the ascii values of each char
def myHash(s):
    myHashValue = 1
    for c in s:
        myHashValue = myHashValue * ord(c)
    return myHashValue

# Create one random password
def makeRandomPassword():
    randomLength = random.randint(8, 11)  # Avg. passwords are 8-11 chars
    password = ''
    # For a password of random length...
    for i in range(randomLength):
        # ...add random printable characters
        password = password + random.choice(string.printable)
    return password

# Simulate a batch of passwords and calculate the collision rate
def simulateCollisionRate(numPasswords):
    # Keep track of what we've seen before
    """
    (Note: We haven't seen sets in 110! Sets are like dictionary keys;
           they cannot contain duplicates or mutable items, but they're
           fast!  We don't recommend using them in 110, but they make this
           code speedier.)
    """
    uniqueHashes = set()
    uniquePasswords = set()
    for p in range(numPasswords):
        # Make a new password
        thisPassword = makeRandomPassword()

        # Make a different one if we've seen this password before
        while thisPassword in uniquePasswords:
            thisPassword = makeRandomPassword()

        # Keep track of this password and its hash value
        uniquePasswords.add(thisPassword)
        thisHash = myHash(makeRandomPassword())
        uniqueHashes.add(thisHash)
    
    # Note: This works because the uniqueHashes set will not contain duplicates
    return (numPasswords-len(uniqueHashes))/numPasswords

# Standard Monte Carlo simulation!
def getExpectedValue(numPasswords, trials):
    results = []
    for t in range(trials):
        results.append(simulateCollisionRate(numPasswords))
    return sum(results)/len(results)

# Let's run several simulations at different population sizes
def testPopulationSize(minPop, maxPop, trials):
    print("Population | Collision Rate")
    population = minPop
    while population <= maxPop:
        rate = getExpectedValue(population, trials)
        print(population, "|", rate*100, "%")
        population = population * 10

# Test groups of 100 passwords up to groups of 100k passwords
# Run 10 trials each
testPopulationSize(100, 100000, 10)



