%
% Programmed by Chanwoo Kim for the ASRU 2009
%
% (chanwook@cs.cmu.edu; chanwook@gmail.com)
%
%  SPB-D algorihtm
%
% Usage 
% 
%  szOutFeatFileName : Output feature or wave file name
%  szInFileName      : Input file name
%  bLSMS             : Optinion about whether LSMS will be used
%                      or not. If you don't specify this option,
%                      then, the default is using the LSMS.
%
%                      Setting either 1 or 0 is using LSMS or not
%
%
function [aadDCT] = SPB_D(szOutFeatFileName, szInFileName, bLSMS);
	fid = fopen(szInFileName, 'rb');
	fseek(fid, 1024, 'bof');
	ad_x  = fread(fid, 'int16');
	fclose(fid);
    
    if nargin == 2
       bLSMS = 1; 
    end

	bPreem             = 1;
    bSPB               = 1;
    bDisplay           = 0;
    
	dFrameLen    = 0.0256;  % 25.6 ms window length, which is the default setting in CMU Sphinx
	dSampRate    = 16000;
	dFramePeriod = 0.010;   % 10 ms frame period
	
    iFFTSize = 1024;
    iNumChan = 40;
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%
	% Pre-emphasis using H(z) = 1 - 0.97 z ^ -1
	%
	if (bPreem == 1)
		ad_x = filter([1 -0.97], 1, ad_x);
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%
	% Obtaning the gammatone coefficient. 
	%
    % Based on M. Snelly's auditory toolbox. 
    % In actual C-implementation, we just use a table
    %
    aad_H = ComputeFilterResponse(iNumChan, iFFTSize);
	aad_H = abs(NormalizeFilterGain(aad_H));
  
    iFL        = floor(dFrameLen    * dSampRate);
	iFP        = floor(dFramePeriod * dSampRate);
	iNumFrames = floor((length(ad_x) - iFL) / iFP) + 1;   
    iSpeechLen = length(ad_x);
    
    aad_P = zeros(iNumChan, iNumFrames);
	i_FI = 0;
    
    ad_X  = zeros(iFFTSize / 2, 1);
    aad_X = zeros(iFFTSize / 2, iNumFrames);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Obtaining the short-time Power P(i, j)
    %
    for m = 0 : iFP : iSpeechLen  - iFL 
        ad_x_st            = ad_x(m + 1 : m + iFL) .* hamming(iFL);
        adSpec             = fft(ad_x_st, iFFTSize);
        ad_X               = abs(adSpec(1: iFFTSize / 2));
        aad_X(:, i_FI + 1) = ad_X; 

        i_FI = i_FI + 1;
    end

   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   %
   % Log Spectral Mean Subtraction for each frequency index
   %
   if bLSMS == 1
           LSMS_aad_X_tilde = zeros(size(aad_X));
           LSMS_i_L         = floor(dSampRate * 1.5 / iFP);  % The moving average length is 3 second
           LSMS_aad_LogX    = log(max(aad_X, eps));
  
           for j = 1 : iNumFrames
                for i = 1 : iFFTSize / 2
                    LSMS_aad_X_tilde(i, j) =  aad_X(i, j) ./ ...
                          exp(mean(LSMS_aad_LogX(i , max(j - LSMS_i_L, 1) : min(j + LSMS_i_L, iNumFrames))));
                  end
            end
  
         aad_X = LSMS_aad_X_tilde;
   end
     
    i_FI = 0;
    aad_P  = zeros(iNumChan, iNumFrames);
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Obtaining the short-time Power P(i, j)
    %
    for m = 0 : iFP : iSpeechLen  - iFL
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      %
      % Calculating the Power P(i, j)
      %
      for j = 1 : iNumChan
              %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
              %
              % Squared integration
              %
              aad_P( j , i_FI + 1)  = sum((aad_X( : , i_FI + 1) .* aad_H(:, j)) .^ 2);
      end
      i_FI = i_FI + 1;
    end
    
    if bSPB == 1
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %
        % Obtaining 95 percentile peak power
        %
        adSorted      = sort(aad_P(:));
        SPB_d_P_peak  = adSorted(round(0.95 * length(adSorted)));
        [iNumChan, iNumFrames] = size(aad_P);
        
        SPB_d_alpha = 0.02;

        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %
        % Obtaining weighting
        %
        SPB_aad_w   = (sqrt((aad_P.^2 + (SPB_d_alpha *  SPB_d_P_peak) .^ 2)) ./ max(aad_P, eps));
            
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %
        % Weighting smoothing
        %
        SPB_aad_w_tilde = zeros(size(SPB_aad_w));

        SPB_i_M = 4;
        SPB_i_N = 1;
        for j = 1 : iNumFrames,
              for i = 1 : iNumChan,
                    SPB_aad_w_tilde(i, j) = mean(mean(log(SPB_aad_w(max(i - SPB_i_M, 1) : min(i + SPB_i_M, iNumChan), ...
                                                            max(j - SPB_i_N, 1): min(j + SPB_i_N, iNumFrames)))));
              end
        end
        aad_P = exp(SPB_aad_w_tilde) .* aad_P;
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Apply the nonlinearity
    %
    aadSpec = log(aad_P + eps);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % DCT
    %
    aadDCT                  = dct(aadSpec);
    aadDCT(14:iNumChan, :) = [];
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % CMN
    %
    for i = 1 : 13
           aadDCT(i, : ) = aadDCT(i, : ) - mean(aadDCT(i, : ));
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Writing the feature in Sphinx format
    %
    [iM, iN] = size(aadDCT);
    iNumData = iM * iN;
    fid      = fopen(szOutFeatFileName, 'wb');
    fwrite(fid, iNumData, 'int32');
    iCount = fwrite(fid, aadDCT(:), 'float32');
	fclose(fid);
   
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Display
    %
    if bDisplay == 1
        figure
        aadSpec = idct(aadDCT, iNumChan);
        imagesc(aadSpec); axis xy;
    end
  
end
