function [hyperParams] = chooseHyperParams(X, Y)
% Picks the best scale and bandwidth by maximizing the marginal likelihood

  % Determine candidates for h, sigma
  numCands = 10;
  hCands = logspace(-1, 1, numCands)' * norm(std(X));
  sigmaCands = logspace(-1, 1, numCands)' * std(Y);
  a1 = repmat(hCands, numCands, 1);
  a2 = repmat(sigmaCands, 1, numCands); a2 = a2(:);
  cands = [a1 a2];

  % set the mean value
  priorMean = mean(Y);
  hyperParams.priorMean = priorMean;
  y = Y - priorMean;
  % set the noise level
  eta = 0.01 * std(Y);
  hyperParams.eta = eta;

  % Now determine the best pair of candidates
  bestNlml = -inf;
  vals = zeros(size(cands,1), 1);
  for i = 1:size(cands, 1)
    h = cands(i, 1);
    sigma = cands(i, 2);
    [nlml, L, alpha] = normalizedMargLikelihood(h, sigma, X, y, eta);
    vals(i) = nlml;
    if nlml > bestNlml
      bestNlml = nlml;
      hyperParams.h = h;
      hyperParams.sigma = sigma;
      hyperParams.L = L;
      hyperParams.alpha = alpha;
    end
  end

end

function [nlml, L, alpha] = normalizedMargLikelihood(h, sigma, X, y, eta)
  n = size(X, 1);
  K = GaussKernel(X, X, sigma, h) + eta * eye(n);
  L = stableCholesky(K);
  alpha = L' \ (L \ y);
  nlml = -1/2 * y' * alpha - sum(log(diag(L))) - n/2 * log(2*pi);
end

