"""
Learning Goals:
 - Recognize certain problems that arise while multiprocessing, such as difficulty of design and deadlock
 - Create pipelines to increase the efficiency of repeated operations by executing sub-steps at the same time
 - Use the MapReduce pattern to design and code parallelized algorithms for distributed computing
"""


# Assume the page is a string
def mapper(f, target):
    s = cleanFile(readFile(f))
    words = s.split(" ")
    for i in range(len(words)):
        word = words[i]
        if word == target:
            print("file", f, "found it on word #", i)
            return True
    print("file", f, "didn't find it")
    return False

# If the word is on any page, return True
def reducer(lst):
    print("reducer is checking the list:", lst)
    for result in lst:
        if result == True:
            return True
    return False


### You don't need to understand the code under this line ###

# This reads a file into a string
def readFile(filename):
    f = open(filename, "r")
    text = f.read()
    f.close()
    return text

# This cleans the file by removing punctuation and making everything lowercase
def cleanFile(s):
    s = s.replace("\n", " ")
    while "  " in s:
        s = s.replace("  ", " ")
    for punctuation in ".!?,:;()":
        s = s.replace(punctuation, "")
    s = s.replace('"', "").replace("'", "")
    s = s.replace("--", " ")
    s = s.lower()
    return s

# Two helper functions for the collector
def mapWrapper(tup):
    (filename, target) = tup
    return [("found", mapper(filename, target))]

def reduceWrapper(tup):
    return reducer(tup[1])

# The collector organizes the MapReduce process
def collector(target):
    files = [ ("data/chapter" + str(n) + ".txt", target) for n in range(1, 13) ]

    # Run MapReduce on them
    from mapreduce import SimpleMapReduce
    process = SimpleMapReduce(mapWrapper, reduceWrapper)
    result = process(files)[0]

    # Check the result
    return result

if __name__ == '__main__':
    print("Final result:", collector("tea"))