% d = samplepost(xs, ys, zs, cx, cy, cz, wts, numreal)
%
% We are given some observed track candidates at positions (xs, ys,
% zs), along with their estimated reliabilities (wts, in [0,1]).  We
% believe that numreal of the tracks are "real" (ie, near previously
% observed tracks (cx, cy, cz) and having high reliability scores);
% the rest are either "noise" (generated at random locations with
% random reliability) or "extra" (generated near real or noise tracks
% with random reliability).
%
% This function uses MCMC to get a sample from the posterior
% distribution of explanations for the data.  An explanation is a
% vector d, the same length as the data, each of whose elements is one
% of the following:
%   - a positive number, meaning "real"
%   - zero, meaning "noise"
%   - a negative number, meaning "extra"
%
% There will be exactly one "real" point with each index in 1:numreal.
% These points will be penalized for distance from the corresponding
% element of (cx, cy, cz).  The "extra" data points are associated
% with "real" or "noise" points, so that an "extra" point i with
% d(i)=-11 is penalized for its distance from point 11 (which must not
% be extra).

%    Copyright (C) 2005
%    Geoff Gordon  ggordon@cs.cmu.edu
%    Andrew Gove
%
%    This file is part of DotTrack, dot tracking software for
%    fluorescence microscope images.
%
%    DotTrack is free software; you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published
%    by the Free Software Foundation; either version 2 of the License,
%    or (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
%    General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program; if not, write to the Free Software
%    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
%    02110-1301 USA

function d = samplepost(xs, ys, zs, cx, cy, cz, wts, numreal)

% parameters
steps = 3000;
%steps = 5000;
n = length(xs);

% initialize so that random tracks are assigned to be real, and all
% others noise
d = zeros(n,1);
perm = randperm(n);
d(perm(1:numreal)) = 1:numreal;
p = density(d, xs, ys, zs, cx, cy, cz, wts);

% keep track of most probable d found so far
bestp = p;
bestd = d;
beststep = 0;

for step = 1:steps

  % Proposal distribution: pick a random index.  If it's currently
  % explained as a "real" (>0) observation, swap explanations with a
  % random other index.  If it's currently "noise" (=0) with some
  % "extra"s hanging off of it, swap with one of the "extra"s at
  % random.  If it's currently an "extra" (<0) or a "noise" with no
  % "extra"s, and if there are k "noise" points not including this
  % point, pick from k+1 alternatives: "noise", or "extra" hanging off
  % of one of the k.  Note that this proposal dist'n satisfies
  % detailed balance.

  hasextra = zeros(size(d));		% which points have extras?
  hasextra(-d(d<0)) = 1;

  nd = d;				% the new explanation
  i = 1+floor(rand*n);			% where to propose a change

  if (d(i) > 0)				% if i was a real point
    j = 1+floor(rand*n);		% swap with j
    if (d(j) == -i)			% if j was one of i's extras
      nd(d == -i) = -j;			% reassign extras to j
      nd(j) = d(i);
      nd(i) = -j;
    else				% o/w just swap
      nd(i) = d(j);
      nd(j) = d(i);
    end
  elseif ((d(i)==0) && (hasextra(i)))	% if i was noise w/ extras
    theextras = find(d==-i);		% find the extras
    j = 1+floor(rand*length(theextras));
    j = theextras(j);			% pick a random one of the extras
    nd(theextras) = -j;			% reassign all extras to j
    nd(i) = -j;				% and i to j
    nd(j) = 0;				% and j to be noise
  else					% i was extra or noise-w/o-extra
    noisemask = (d==0);
    noisemask(i) = 0;
    thenoise = find(noisemask);		% find noise tracks, except i
    j = floor(rand*(length(thenoise)+1)); % either
    if (j == 0)				% assign i to be noise
      nd(i) = 0;
    else				% or as extra for j
      nd(i) = -thenoise(j);
    end
  end
 
  % sanity checks
%  if ((sum(nd==1) ~= 1) || (sum(nd==2) ~= 1))
%    error(sprintf('%d %d %d %d %d %d', i, j, d(i), d(j), nd(i), nd(j)))
%  end
%  seq = (1:n)';
%  mask = -seq == nd;
%  if (sum(mask))
%    error(sprintf('%d %d %d %d %d', i, j, d(i), d(j), find(mask)));
%  end
  

  % MCMC step: accept nd with probability np/p
  np = density(nd, xs, ys, zs, cx, cy, cz, wts);
  if (rand < np/p)
    d = nd;
    p = np;
  end
  
  if (p > bestp)
    bestp = p;
    bestd = d;
    beststep = step;
  end

end

% This line changes the purpose of the function: rather than return
% a sample, return the most probable configuration found.  (Delete
% it to restore the original behavior.)
d = bestd;
% fprintf('bestp %g, step %d\n', bestp, beststep);

return



% Observation model: weights for "noise" or "extra" points are uniform
% on [0, 1]; prior of real v. extra v. noise is [.1,.6,.3]; weights
% for "real" points have pdf Z/(1+exp(-15*(x-.2))) for x in [0,1],
% where Z=1.2555 (or so).  This results in having a strong preference
% for selecting weights of .3 instead of .1 as "real", but essentially
% no preference between .5 and .6.
%
% Old observation model: weights for "real" points are 1-w where w is
% exponentially distributed w/ mean 1/5.7; weights for "noise" or
% "extra" points are uniform on [0, 1]; prior of real v. extra
% v. noise is [.1,.6,.3].

function p = density(d, xs, ys, zs, cx, cy, cz, wts)

% compute likelihood of each weight under each possible type of
% assignment
lreal = 0.1*1.2555./(1+exp(-15*(wts-.2)));
%lreal = 0.1*5.7*exp(-5.7*(1-wts));
lnoise = repmat(.3, size(wts));
lextra = repmat(.6, size(wts));

% magic parameters: approximate area of the rectangle that we're
% searching in, variance of the displacement of "extra" points from
% their parents, and variance of the displacement of "real" points
% from their previous locations.
area = 1000;
extrasig2 = 1;
motionsig2 = 25;

% Select the likelihoods corresponding to d and multiply them.  Also
% multiply in the likelihood of (x,y,z), which is constant for noise
% points and normal for real or extra points.
p = 1;
for i = 1:length(xs)
  if (d(i) > 0)				% real
    p = p * lreal(i);
    j = d(i);
    dist2 = (xs(i) - cx(j))^2 + (ys(i) - cy(j))^2 + (zs(i) - cz(j))^2;
    dist2 = dist2 / (2*motionsig2);
    if (dist2 > 25) dist2 = 25; end
    p = p * exp(-dist2) / sqrt(2*pi*motionsig2);
  elseif (0 == d(i))			% noise
    p = p * lnoise(i);
    p = p / area;
  else					% extra
    p = p * lextra(i);
    j = -d(i);
    dist2 = (xs(i) - xs(j))^2 + (ys(i) - ys(j))^2 + (zs(i) - zs(j))^2;
    dist2 = dist2 / (2*extrasig2);
    if (dist2 > 25) dist2 = 25; end
    p = p * exp(-dist2) / sqrt(2*pi*extrasig2);
  end
end

return




% old code follows

% initialize to a reasonable guess: the biggest weights are
% explained as "real", others as "noise".
%[swts, topwts] = sort(wts);
%d = zeros(n, 1);
%d(topwts(end-numreal+1:end)) = 1:numreal;


% XXX Observation model: weights for "noise" points are normal with mean
% XXX -.4 and sigma^2=.05; weights for "real" points are normal with mean
% XXX 1 and sigma^2=.05; weights for "extra" points are uniform.


%  eep = 0;
%  if (step > burnin) ep/(step-burnin); end
%  fprintf('%4d: %g %g\n', step, p, eep);
%  for h = 1:length(d)
%    if (d(h) < 0)
%      set(lh(h), 'Color', [1 0 0]);
%    elseif (d(h) > 0)
%      set(lh(h), 'Color', [0 0 1]);
%    else
%      set(lh(h), 'Color', [0 1 0]);
%    end
%  end
%  set(lh(i), 'Marker', 'x');
%  pause
%  set(lh(i), 'Marker', 'o');

