""" Week9-1 Notes """

# Assume the page is in a file
def mapper(f, target):
    # don't worry about reading/cleaning files
    # yet – we'll get there soon!
    text = cleanFile(readFile(f))
    words = text.split(" ")
    for i in range(len(words)):
        word = words[i]
        if word == target:
            print("file", f, "found on word #", i)
            return True
    print("file", f, "didn't find it")
    return False

# If the word is on any page, return True
def reducer(lst):
    print("reducer is checking", lst)
    for pageResult in lst:
        if pageResult == True:
            return True
    return False

### You don't need to understand the code under this line ###

# This reads a file into a string
def readFile(filename):
    f = open(filename, "r")
    text = f.read()
    f.close()
    return text

# This cleans the file by removing punctuation and making everything lowercase
def cleanFile(s):
    s = s.replace("\n", " ")
    while "  " in s:
        s = s.replace("  ", " ")
    for punctuation in ".!?,:;()":
        s = s.replace(punctuation, "")
    s = s.replace('"', "").replace("'", "")
    s = s.replace("--", " ")
    s = s.lower()
    return s

# Two helper functions for the collector
def mapWrapper(tup):
    (filename, target) = tup
    return [("found", mapper(filename, target))]

def reduceWrapper(tup):
    return reducer(tup[1])

# The collector organizes the MapReduce process
def collector(target):
    files = [ ("data/chapter" + str(n) + ".txt", target) for n in range(1, 13) ]

    # Run MapReduce on them
    from mapreduce import SimpleMapReduce
    process = SimpleMapReduce(mapWrapper, reduceWrapper)
    result = process(files)[0]

    # Check the result
    return result

if __name__ == '__main__':
    print("Final result:", collector("tea"))