% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0
% 
% Copyright (c) 2010, Kornel Laskowski
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without modification, are permitted
% provided that the following conditions are met:
%
%    * Redistributions of source code must retain the above copyright notice, this list of
%      conditions and the following disclaimer.
%    * Redistributions in binary form must reproduce the above copyright notice, this list of
%      conditions and the following disclaimer in the documentation and/or other materials provided
%      with the distribution.
%    * Neither the name of Sigtactica Research or of the Royal Institute of Technology (KTH) or of
%      Carnegie Mellon University nor the names of their contributors may be used to endorse or
%      promote products derived from this software without specific prior written permission.
% 
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
% IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
% FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
% CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
% DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
% DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
% IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
% OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
%
% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

meetIdList = { ...
	'Bdb001', ...
	'Bed002', ...
	'Bed003', ...
	'Bed004', ...
	'Bed005', ...
	'Bed006', ...
	'Bed008', ...
	'Bed009', ...
	'Bed010', ...
	'Bed011', ...
	'Bed012', ...
	'Bed013', ...
	'Bed014', ...
	'Bed015', ...
	'Bed016', ...
	'Bed017', ...
	'Bmr001', ...
	'Bmr002', ...
	'Bmr003', ...
	'Bmr005', ...
	'Bmr006', ...
	'Bmr007', ...
	'Bmr008', ...
	'Bmr009', ...
	'Bmr010', ...
	'Bmr011', ...
	'Bmr012', ...
	'Bmr013', ...
	'Bmr014', ...
	'Bmr015', ...
	'Bmr016', ...
	'Bmr018', ...
	'Bmr019', ...
	'Bmr020', ...
	'Bmr021', ...
	'Bmr022', ...
	'Bmr023', ...
	'Bmr024', ...
	'Bmr025', ...
	'Bmr026', ...
	'Bmr027', ...
	'Bmr028', ...
	'Bmr029', ...
	'Bmr030', ...
	'Bmr031', ...
	'Bns001', ...
	'Bns002', ...
	'Bns003', ...
	'Bro003', ...
	'Bro004', ...
	'Bro005', ...
	'Bro007', ...
	'Bro008', ...
	'Bro010', ...
	'Bro011', ...
	'Bro012', ...
	'Bro013', ...
	'Bro014', ...
	'Bro015', ...
	'Bro016', ...
	'Bro017', ...
	'Bro018', ...
	'Bro019', ...
	'Bro021', ...
	'Bro022', ...
	'Bro023', ...
	'Bro024', ...
	'Bro025', ...
	'Bro026', ...
	'Bro027', ...
	'Bro028', ...
	'Bsr001', ...
	'Btr001', ...
	'Btr002', ...
	'Buw001' ...
};

R = length(meetIdList);

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

unkFraction = 0.000001;

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

expQNegLogProbAll = [];
expQNegLogProbSub = [];

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

%
% Accumulate counts for each meeting individually.
%

for r=1:R
	pathName = sprintf('../lex.Q/%s.Q',meetIdList{r});
	Q = textread(pathName);
	[T,K] = size(Q);
	a1{r} = zeros(2,2);
	QPrev = zeros(1,K);
	for t=1:T
		for k=1:K
			q = Q(t,k);
			qPrev = QPrev(k);
			a1{r}(qPrev+1,q+1) = a1{r}(qPrev+1,q+1) + 1;
		end
		QPrev = Q(t,:);
	end
end

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

for r=1:R

	%
	% Sum counts for all meetings other than the one being tested.
	%

	a = zeros(2,2);
	for s=1:R
		if s ~= r 
			a = a + a1{s};
		end
	end

	%
	% Normalize a counts to be probabilities.
	%
 
	sumARows = sum(a,2);
	zeroIdx = find(sumARows == 0);
	a(zeroIdx,:) = 1;
	sumARows = sum(a,2);
	a = a ./ (sumARows * ones(1,2));

	%
	% Share unkFraction probability mass for unseen events.
	%

	for i=1:2
		nZero = 0;
		for j=1:2
			if a(i,j) == 0
				nZero = nZero + 1;
			end
		end
		if nZero > 0
			for j=1:2
				if a(i,j) == 0
					a(i,j) = unkFraction / nZero;
				else
					a(i,j) = a(i,j) * ( 1 - unkFraction );
				end
			end
		end
	end

	%
	% Re-normalize a counts to be probabilities.
	%

	sumARows = sum(a,2);
	a = a ./ (sumARows * ones(1,2));

	%
	% Score test Q against the model.
	%

	pathName = sprintf('../lex.Q/%s.Q',meetIdList{r});
	Q = textread(pathName);
	[T,K] = size(Q);
	encoding = zeros(K,1);
	for k=1:K
		encoding(k) = 2^(k-1);
	end
	encodedQ = Q * encoding;

	QPrev = zeros(1,K);
	qProb = ones(T,1);
	for t=1:T
		for k=1:K
			q = Q(t,k);
			qPrev = QPrev(k);
			qProb(t) = qProb(t) * a(qPrev+1,q+1);
		end
		QPrev = Q(t,:);
	end

	qNegLogProb = -log2(qProb)/K;

	expQNegLogProbAll = [ expQNegLogProbAll; qNegLogProb ];

	subIdx = find([0; encodedQ(1:(end-1))] ~= encodedQ);
	expQNegLogProbSub = [ expQNegLogProbSub; qNegLogProb(subIdx) ];
end

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

disp(sprintf( 'ALL: %18.16g SUB: %18.16g', ...
	2^(sum(expQNegLogProbAll)/length(expQNegLogProbAll)), ...
	2^(sum(expQNegLogProbSub)/length(expQNegLogProbSub)) ));

% ==+====1====+====2====+====3====+====4====+====5====+====6====+====7====+====8====+====9====+====0

