import time
import numpy as np

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn

# Uncomment this to force THCudaCheck FAIL to throw an exception.
# torch.backends.cudnn.benchmark=True

trainset = torchvision.datasets.MNIST(root='./mnist_data',
                                      download=True,
                                      transform=transforms.ToTensor())

# trainset.train_data is 60,000 tuples of form ( <28x28 tensor>, <scalar tensor> )
print(trainset)

class OneConvLayer(nn.Module):
  def __init__(self, in_dim, out_dim, nkernels):
    super(OneConvLayer, self).__init__()
    self.network1 = nn.Sequential(
      nn.Conv2d(in_channels=1, out_channels=nkernels, kernel_size=5, stride=1, padding=2),
      nn.BatchNorm2d(nkernels),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2),
    )
    self.network2 = nn.Linear(nkernels*14*14, out_dim)


  def forward(self, x):
    out = self.network1(x)
    out = out.view(out.size(0), -1)
    out = self.network2(out)
    return out

# Uncomment just one of the two lines below:
device = torch.device(0)        # GPU board
#device = torch.device('cpu')    # regular CPU
print('device is', device)

in_dim = 28 * 28
out_dim = 10
nkernels = 32

model = OneConvLayer(in_dim, out_dim, nkernels).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005)

batchSize = 100
epochs = 15
print('Loading...')
trainloader = torch.utils.data.DataLoader(dataset=trainset,  batch_size=batchSize, shuffle=True)
print('Loading complete.')

for epoch in range(epochs):
  runningLoss = 0.0
  now = time.time()
  correct = 0.0 
  for (images,labels) in trainloader:
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    outputs = model(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    runningLoss += loss.item() * labels.shape[0]

    # Calculate success rate for this batch
    outputs = outputs.detach()
    #o = outputs.detach().cpu().numpy()
    for i in range(batchSize):
      predict = torch.argmax(outputs[i,:])
      #predict = np.argmax(o[i,:])
      if predict == labels[i]:
        correct += 1
  # end of epoch
  print('{:2}  loss = {:8.2f}   time = {:.2f} s   correct = {:.2f} %'.format(
        epoch, runningLoss, time.time()-now, correct/60000*100))


import matplotlib.pyplot as plt

plt.figure()

weights = model.network1.parameters().__next__().detach().cpu()

for i in range(32):
  plt.imshow(weights[i].view(5,5))
  plt.show(block=False)
  input('Press Enter to proceed...')

