%
% Programmed by Chanwoo Kim for the INTERSPEECH 2009
%
% (chanwook@cs.cmu.edu)
%
% Important: The input should be in 16 kHz
%
% * In the source code, if you want to skip the power bais subtraction, then
% change bMedPowerBiasSub to 0
%
% * If you want to use logarithmic nonlinearity instead of the power
% nonlinearity, change bPowerLaw to 0
%
% PNCC(OutFile, InFile)
%
% - InFile is mono 16 kHz sample
%

function [aadDCT] = SPBResyn(szOutFeatFileName, szInFileName, dAlpha);
	fid = fopen(szInFileName, 'rb');
	fseek(fid, 1024, 'bof');
	ad_x  = fread(fid, 'int16');
	fclose(fid);

	bPreem             = 1;
    bSPBResyn          = 1;
    bMoveAvgSubMethod1 = 0;
    bDisplay           = 0;

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%
	% Obtinaed from experiment
	%
	%dAlpha = 0.02;
    
	dFrameLen    = 0.0256;  % 25.6 ms window length, which is the default setting in CMU Sphinx
	dSampRate    = 16000;
	dFramePeriod = 0.010;   % 10 ms frame period
	
    iFFTSize = 1024;
    iNumChan = 40;
	%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%
	% Pre-emphasis using H(z) = 1 - 0.97 z ^ -1
	%
	if (bPreem == 1)
		ad_x = filter([1 -0.97], 1, ad_x);
    end
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
	%
	% Obtaning the gammatone coefficient. 
	%
    % Based on M. Snelly's auditory toolbox. 
    % In actual C-implementation, we just use a table
    %
    aad_H = ComputeFilterResponse(iNumChan, iFFTSize);
	aad_H = abs(NormalizeFilterGain(aad_H));
    
    
    if bSPBResyn == 1
        
        dFrameLen_SPB    = 0.0512;
        dFramePeriod_SPB = dFrameLen_SPB / 4;
        iSpeechLen       = length(ad_x);
        
        iFL_SPB        = floor(dFrameLen_SPB    * dSampRate);
        iFP_SPB        = floor(dFramePeriod_SPB * dSampRate);
        iNumFrames_SPB = floor((length(ad_x) - iFP_SPB) / iFP_SPB) + 1;
        
        iNumChan_SPB  = 40;
        aad_P_SPB      = zeros(iNumChan_SPB, iNumFrames_SPB);
        iFFTSize_SPB       = 2^ceil(log2(iFL_SPB));
        
        if iFFTSize ~= 1024,        
            aad_H_SPB = aad_H;
        else
            aad_H_SPB = ComputeFilterResponse(iNumChan_SPB, iFFTSize_SPB);
            aad_H_SPB = abs(NormalizeFilterGain(aad_H_SPB));
        end
        
       aad_X_SPB = zeros(iFFTSize_SPB, iNumFrames_SPB);
        
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      %
      % Frame-by-frame processing
      %
      iFI = 0;
      for m = 0 : iFP_SPB : iSpeechLen - iFL_SPB,
          iFI = iFI + 1;
  
          ad_x_Frame        = ad_x(m + 1 : m + iFL_SPB) .* hamming(iFL_SPB);
          ad_X              = fft(ad_x_Frame, iFFTSize_SPB);
          aad_X_SPB(:, iFI) = abs(ad_X);
      end
      
      SPB_iWin = floor(1.5 / dFramePeriod_SPB); % 3 second window
      
      aad_X_SPB_MS = zeros(size(aad_X_SPB));
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      %
      % Frame-by-frame processing
      %
      for j = 1 : iNumFrames_SPB
         for i = 1 : iFFTSize_SPB
            aad_X_SPB_MS(i, j) =  aad_X_SPB(i, j) ./ ...
                 exp(mean(log(max(aad_X_SPB(i , max(j - SPB_iWin, 1) : min(j + SPB_iWin, iNumFrames_SPB))', eps))));
         end
      end
  
      
      aad_X_SPB = aad_X_SPB_MS;
      %
      % Frame-by-frame processing
      %
      iFI = 0;
      for m = 0 : iFP_SPB : iSpeechLen - iFL_SPB,
          iFI = iFI + 1;
          adHalfSpec          = abs(aad_X_SPB(1 : iFFTSize_SPB / 2, iFI));
          for i = 1 : iNumChan,
              aad_P_SPB(i, iFI) = (sum(((adHalfSpec .* aad_H_SPB(:, i)) .^ 2)));
          end
      
      end
      
      adSorted    = sort(aad_P_SPB(:));
      dMax        = adSorted(round(0.95 * length(adSorted)));
      aadGain_SPB = (sqrt((aad_P_SPB.^2 + (dAlpha *  dMax) .^ 2)) ./ max(aad_P_SPB, eps));
      %
      % Resynthesis
      %
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      
      iPowerFactor = 1;
      
      ad_sumH = sum((aad_H_SPB .^ iPowerFactor )')';
      
      aad_mu_g    = zeros(iFFTSize_SPB / 2, iNumFrames_SPB);
      ad_mu_g_sym = zeros(iFFTSize_SPB, 1);
      
      ad_y = zeros(size(ad_x))';
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      %
      % Resynthesis using OLA
      %
      iFI = 0;
       for i = 0 : iFP_SPB : iSpeechLen - 1 -  iFL_SPB,
           iFI = iFI + 1;
  
             for j = 1 : iNumChan,
                  aad_mu_g(:, iFI) = aad_mu_g(:, iFI) + (aadGain_SPB(j, iFI)) .^ (iPowerFactor / 2) .* (aad_H_SPB(:, j)) .^ iPowerFactor;
              end
              aad_mu_g(:, iFI) = ((aad_mu_g(:, iFI)) ./ ad_sumH) .^ (1 / iPowerFactor);
                                               
              % Make the mu_g symmetric
              ad_mu_g_sym = ([aad_mu_g(:, iFI); flipud(aad_mu_g( :, iFI))]);
  
            aad_X_SPB( :, iFI) = (aad_X_SPB( :, iFI) .*  ad_mu_g_sym);
     
           adBuffer = (ifft(aad_X_SPB( :, iFI))');
           ad_y(i + 1 : i + iFL_SPB) = ad_y(i + 1 : i + iFL_SPB) + adBuffer(1 : iFL_SPB);
       end
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      %
      % Selecting only the real part
     
      ad_y(1 : iFL_SPB) = [];
      iLen = length(ad_y);
      ad_y(iLen - iFL_SPB : iLen) = [];
      ad_y = real(ad_y);

       clear ad_x;
       ad_x = ad_y';
       iSpeechLen = length(ad_x);        
    end

    iFL        = floor(dFrameLen    * dSampRate);
	iFP        = floor(dFramePeriod * dSampRate);
	iNumFrames = floor((length(ad_x) - iFL) / iFP) + 1;   
    iSpeechLen = length(ad_x);
    
    aad_P = zeros(iNumChan, iNumFrames);
	i_FI = 0;
    
    ad_X = zeros(iFFTSize / 2, 1);
    aadX = zeros(iFFTSize / 2, iNumFrames);
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        %
        % Obtaining the short-time Power P(i, j)
        %
        for m = 0 : iFP : iSpeechLen  - iFL 
            ad_x_st                = ad_x(m + 1 : m + iFL) .* hamming(iFL);
            adSpec                 = fft(ad_x_st, iFFTSize);
            ad_X                   = abs(adSpec(1: iFFTSize / 2));
            aadX(:, i_FI + 1)      = ad_X; 
            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            %
            % Calculating the Power P(i, j)
            %
            for j = 1 : iNumChan
                    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
                    %
                    % Squared integration
                    %
                    aad_P( j , i_FI + 1)  = sum((ad_X .* aad_H(:, j)) .^ 2);
            end

             i_FI = i_FI + 1;
        end
        
     [iNumChan, iNumFrames] = size(aad_P);
        
     if bMoveAvgSubMethod1 == 1
           aad_X_tilde = zeros(size(aadX));
  
           iWin = 150;
  
           for j = 1 : iNumFrames
                for i = 1 : iFFTSize / 2
                    aad_X_tilde(i, j) =  aadX(i, j) ./ ...
                         exp(mean(log(max(aadX(i , max(j - iWin, 1) : min(j + iWin, iNumFrames))', eps))));
                  end
             end
  
         aadX = aad_X_tilde;
         i_FI = 0;
         
         aad_P  = zeros(iNumChan, iNumFrames);
          %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
          %
          % Obtaining the short-time Power P(i, j)
          %
          for m = 0 : iFP : iSpeechLen  - iFL
              %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
              %
              % Calculating the Power P(i, j)
              %
              for j = 1 : iNumChan
                      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
                      %
                      % Squared integration
                      %
                      aad_P( j , i_FI + 1)  = sum((aadX( : , i_FI + 1) .* aad_H(:, j)) .^ 2);
              end
              i_FI = i_FI + 1;
          end
    end
   
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Apply the nonlinearity
    %
    aadSpec = log(aad_P + eps);
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % DCT
    %
    aadDCT                  = dct(aadSpec);
    aadDCT(14:iNumChan, :) = [];
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % CMN
    %
    for i = 1 : 13
           aadDCT(i, : ) = aadDCT(i, : ) - mean(aadDCT(i, : ));
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Writing the feature in Sphinx format
    %
    [iM, iN] = size(aadDCT);
    iNumData = iM * iN;
    fid = fopen(szOutFeatFileName, 'wb');
    fwrite(fid, iNumData, 'int32');
    iCount = fwrite(fid, aadDCT(:), 'float32');
	fclose(fid);
   
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    %
    % Display
    %
      if bDisplay == 1
        figure
        aadSpec = idct(aadDCT, iNumChan);
        imagesc(aadSpec); axis xy;
      end
  
end
