"""
Learning Goals:
 - Identify three major categories of reasoning used with machine learning – classification, regression, and clustering – and decide which is the best fit for a problem
 - Given a dataset, identify categorical, ordinal, and numerical features which may help predict the correct output for a given input
 - Identify how training data, validation data, and testing data are used in machine learning to produce an accurate reasoner and measure its performance
"""

import csv

# Load the data
f = open("heights.csv", "r")
reader = csv.reader(f)
data = []
for row in reader:
    rowAsNums = [float(row[0]), float(row[1])]
    data.append(rowAsNums)
f.close()

# Generate the reasoning model by calculating m and b
n = len(data)
sumXY = 0
sumX = 0
sumY = 0
sumXSquared = 0
for row in data:
    sumXY += row[0] * row[1]
    sumX += row[0]
    sumY += row[1]
    sumXSquared += row[0]**2

m = (n*sumXY - sumX*sumY) / (n*sumXSquared - sumX**2)
b = (sumY - m*sumX) / n
print("m:", m)
print("b:", b)

# Run the model on new inputs
print("7:", m*7 + b)
print("17:", m*17 + b)