# CategoricalNB takes ints, not strings,
# so map each string to a number
def mapToIndex(flavor):
    allFlavors = [ "chocolate", "coffee/tea", 
                   "cookie",    "fruit", 
                   "vanilla",   "other" ]
    return allFlavors.index(flavor)

# First, load the relevant data into
# a 2D list

# Header:
# Semester,
# #1 Orig,     #2 Orig,     #3 Orig,
# #1 Cleaned,  #2 Cleaned,  #3 Cleaned,
# #1 Category, #2 Category, #3 Category
import csv
f = open("all-icecream.csv", "r")
orig = list(csv.reader(f))
allData = []
for i in range(1, len(orig)): # skip header
    entry = [orig[i][0]] # include semester
    for j in range(7, 10): # include categories
        entry.append(mapToIndex(orig[i][j])) # but map to number first
    allData.append(entry)
f.close()

# Second, separate data into train vs test sets

trainInput = []
trainLabels = []
testInput = []
testLabels = []
for point in allData:
    semester = point[0] # semester
    inputValues = point[1:3] # #1, #2
    outputValue = point[3] # #3
    if semester == "F23":
        testInput.append(inputValues)
        testLabels.append(outputValue)
    else:
        trainInput.append(inputValues)
        trainLabels.append(outputValue)

# Third, train the model and try it out!

from sklearn.naive_bayes import CategoricalNB

model = CategoricalNB()
model.fit(trainInput, trainLabels)

print(model.predict([ [0, 2] ]))

# Test the model to see how it performs.

print(model.score(testInput, testLabels))