/*
 *  Team 1
 *  CS575: Software Design
 *  Project Phase I
 *  Pipe-and-Filter style
 *  PNFInput.java
 */

import java.io.*;
import java.util.Vector;

/**
 *  <p>Input class for the Pipe and Filter KWIC system.</p>
 *
 *  <p>This class represents a filter that reads in data from specified files.  
 *  It reads the data into a SentenceCollection that is given to it.  It then 
 *  returns that SentenceCollection back, now filled with the data from the files, if any.
 *  This filter does not make any assumptions about the contents of the incoming collection.
 *  It does however assume the file format to be as follows:  a sentence per line with 
 *  the words seperated by white space.
 * 
 *  <p><b>PRE:</b> expects to receive the name(s) of the input file(s) and the 
 *  SentenceCollection data object
 *  <br><b>POST:</b> after execution, all the files will have been parsed and 
 *  data inserted into the SentenceCollection data object, the data files will not be changed
 *
 *  @author Diana Tetelman
 */

public class PNFInput 
{
    private char tokenTermChar;
    private char lineTermChar;
    
    private StreamTokenizer in;
    
    /**
     * <p> <b>Test driver</b></p>
     * <p> The correct output of it should look like this...</p>
     *
     *       <br>Setting the token termination char to " "
     *       <br>Adding the word "The"
     *       <br>Adding the word "cat"
     *       <br>Adding the word "is"
     *       <br>Adding the word "in"
     *       <br>Adding the word "the"
     *       <br>Adding the word "hat"
     *       <br>Adding the word "This"
     *       <br>Adding the word "is"
     *       <br>Adding the word "an"
     *       <br>Adding the word "input"
     *       <br>Adding the word "file"
     *       <br>
     *       <br>The cat is in the hat 
     *       <br>This is an input file 
     *
     */
    public static void main(String[] args) throws KWICException
    {
        Vector files = new Vector();
        files.add(new String("KWICInput.txt"));
        PNFInput input = new PNFInput(' ', '\n');
        SentenceCollection data = new SentenceCollection();
        SentenceCollection outData = input.getData(files, data);
        
        // print the data collection
        System.out.println();
        outData.startKWICRowIterator();
        while( outData.hasNextKWICRow() == true )
        {
            Object[] temp = outData.getNextKWICRow();
            int count = temp.length;
            for( int i=0; i<count; i++ )
            {
                System.out.print(temp[i].toString() + " ");
            }
            System.out.println();
        }
    }

    /**
     * <p> Constructor
     *
     * @param      tokenTerm       Character which is used to separate tokens (for example ' ' means space)
     *              lineTerm        Character which is used to terminate a line in the file (for example '\n' for newline char)
     */
    public PNFInput(char tokenTerm, char lineTerm)
    {
        tokenTermChar = tokenTerm;
        lineTermChar = lineTerm;        
    }
    
    /**
     * <p> This method will read in data from the specified files and return a SentenceCollection
     * data object with all of the new data in it.
     *
     * @params      filenames       String[] which should contain the name(s) of the files to be parsed
     *              dataCollection  SentenceCollection data object
     * @return                      SentenceCollection data object filled with read data (if any found)
     * @exception   KWICException   thrown if any errors occur
     */
    public SentenceCollection getData( Vector filenames, SentenceCollection dataCollection )
        throws KWICException
    {
        for( int fIndex = 0; fIndex < filenames.size(); fIndex++ )
        {
            try{
                setupTokenizer((String)filenames.get(fIndex));

                int tokenType = in.nextToken();
                while( tokenType != in.TT_EOF )
                {
                    Vector words = new Vector();                   
                    while( tokenType != in.TT_EOF && tokenType != in.TT_EOL && 
                           ! in.sval.toCharArray().equals(new Character(lineTermChar)) )
                    {
                        //System.out.println("Adding the word \"" + in.sval + "\"");
                        words.add(in.sval);                     
                        tokenType = in.nextToken();
                    }
                    dataCollection.addKWICRow(words.toArray());
                    tokenType = in.nextToken();
                }                
            }
            catch( FileNotFoundException e )
            {
                e.printStackTrace();
                throw new KWICException(filenames.get(fIndex) + " not found!");                
            }
            catch( IOException e )
            {
                throw new KWICException("Problem parsing file into tokens!");
            }
        }        
        return dataCollection;
    }    
    
    private void setupTokenizer(String filename) throws KWICException
    {
        try{
            in = new StreamTokenizer(
                 new BufferedReader(
                 new FileReader(filename)));

            // if the newline is not the sentence terminator, treat it as whitespace
            if( lineTermChar == '\n' )
                in.eolIsSignificant(true);
            else
                in.eolIsSignificant(false);

            // set up the termination character if it is out of the current range.
            if( ! (Character.getNumericValue(tokenTermChar) >= '\u0000' && 
                   Character.getNumericValue(tokenTermChar) <= '\u0020') )
            {
                //System.out.println("Setting the token termination char to \"" + 
                //                    tokenTermChar + "\"");
                in.whitespaceChars(Character.getNumericValue(tokenTermChar),
                                   Character.getNumericValue(tokenTermChar));
            }
        }
        catch( FileNotFoundException e )
        {
            e.printStackTrace();
            throw new KWICException(filename + " not found!");                
        }
    }
    
}
