/***************************************************************************
                          Qprocessor.cpp  -  description
                             -------------------
    begin                : Tue Sep 18 2001
    copyright            : (C) 2001 by Yinglian Xie
    email                : ylxie@cs.cmu.edu
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

// qprocessor.cpp: implementation of the CQuery, CQprocessor class.
//
//////////////////////////////////////////////////////////////////////

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <algorithm>
#include "qprocessor.h"
#include "findex.h"
#include "utils.h"

//////////////////////////////////////////////////////////////////////
// class CQuery
//////////////////////////////////////////////////////////////////////
CQuery::CQuery()
{
	m_qNumResult = DEFAULT_NUMRESULT;
	m_qPosition = DEFAULT_POSTION;
	m_qSearchOption = DEFAULT_SOPTION;
	m_qRankOption = DEFAULT_ROPTION;
	m_qDescription = DEFAULT_DESCRIPTION;
}

CQuery::~CQuery()
{
}

//////////////////////////////////////////////////////////////////////
// class CQprocessor
//////////////////////////////////////////////////////////////////////

CQprocessor::CQprocessor(CFindex *index)
{
	m_index = index;
}

CQprocessor::~CQprocessor()
{

}

////////////////////////////////////
// public functions
//////////////////////////////////

/////////////////////////////////////////////////////////////////
//  functions to setup config options
void CQprocessor::set_delimitchar(string delimitchar)
{
	/* note: * is allowed for wildchar matching */
	m_delimitchar = delimitchar + " \n\r\t\0";

	string::size_type p;
	if ((p = m_delimitchar.find('*')) < m_delimitchar.length()){
		m_delimitchar.erase(p, 1);
	}

}


/////////////////////////////////////////////////////////////////
//  interface functions for request manager

/* 
 * lookup_local: main function for query processing
 *               get a word list, and find matches from index table,
 *               get descriptions from the files
 */
void  CQprocessor::lookup_local(CQuery& q, TResultList& qr, Identity& user)
{
	TStrList wlist;
	TResultList::iterator riter;
	THitList::iterator hiter;
	int fSize;
	time_t fLmtime;
	int start = 0;
    
	/* get word list from query string */
	get_wordlist(q.m_qStr, wlist);
	
	/* look up matches from index table lookup results */
	TStrList::iterator siter;
	for (siter = wlist.begin(); siter != wlist.end(); siter ++){
		
		THitList hlist;
		m_index->lookup(*siter, user, hlist);
		
		// look at search algorithm selected
		switch (q.m_qSearchOption){
		case OR: // OR
			if (hlist.empty())
				break;

			for (hiter = hlist.begin(); hiter != hlist.end(); hiter ++){
				/* check if file info is still correct */
				Ifile& f = m_index->m_files[hiter->fNo];
				if (get_file_info(f.fname, fSize, fLmtime) == -1)
					continue;

				int intitle = in_title(f.fname, *siter);
				if (((q.m_qPosition == TITLE) && (!intitle)) ||
				    ((q.m_qPosition == CONTENT) && (hiter->first_location == -1))|| 
				    ((q.m_qPosition == BOTH) && 
				     ((!intitle) || (hiter->first_location == -1))))
					continue;
				
				riter = find_if(qr.begin(), qr.end(), 
						Qresult_eq(hiter->fNo));

				// concatenate results
				if (riter == qr.end()){
					Qresult r;
					r.fNo = hiter->fNo;
					r.fName = f.fname;
					r.fSize = fSize;
					r.fLmtime = fLmtime;

					if ((q.m_qPosition == CONTENT) || (q.m_qPosition == BOTH)){
						get_description(r.fDescription, q.m_qDescription, f.fname, 
								hiter->first_location, *siter);
					}else{
						r.fDescription.erase();
					}
					qr.push_back(r);
				}else{
					if ((q.m_qPosition != TITLE) && (q.m_qDescription == SHOW_ALL)){
						get_description(riter->fDescription, SHOW_ALL, riter->fName,
								hiter->first_location, *siter);
					}
				}
			} // end for
			
			break;
			
		case AND: // AND
			if (!start){
				for (hiter = hlist.begin(); hiter != hlist.end(); hiter ++){
					/* check if file info is still correct */
					Ifile& f = m_index->m_files[hiter->fNo];
					if (get_file_info(f.fname, fSize, fLmtime) == -1)
						continue;
					
					int intitle = in_title(f.fname, *siter);
					if (((q.m_qPosition == TITLE) && (!intitle)) ||
					    ((q.m_qPosition == CONTENT) && (hiter->first_location == -1))|| 
					    ((q.m_qPosition == BOTH) && 
					     ((!intitle) || (hiter->first_location == -1))))
						continue;
					
					Qresult r;
					r.fNo = hiter->fNo;
					r.fName = f.fname;
					r.fSize = fSize;
					r.fLmtime = fLmtime;
					
					if ((q.m_qPosition == CONTENT) || (q.m_qPosition == BOTH)){
						get_description(r.fDescription, q.m_qDescription, f.fname, 
								hiter->first_location, *siter);
					}else{
						r.fDescription.erase();
					}
					qr.push_back(r);
				}		    
				start = 1;
			}else{
				if (hlist.empty()){
					qr.clear();
					break;
				}

				// traverse the result list to see if we need to remove some
				riter = qr.begin();
				while (riter != qr.end()){
					TResultList::iterator oi = riter;
					riter ++;
					hiter = find_if(hlist.begin(), hlist.end(), 
							Ihit_eq(oi->fNo));
					if (hiter == hlist.end()){
						qr.erase(oi);
						continue;
					}
					
					// remove those unwanted results from qr
					string fname = m_index->m_files[hiter->fNo].fname;
					int intitle = in_title(fname, *siter);
					if (((q.m_qPosition == TITLE) && (!intitle)) ||
					    ((q.m_qPosition == CONTENT) && (hiter->first_location == -1))|| 
					    ((q.m_qPosition == BOTH) && 
					     ((!intitle) || (hiter->first_location == -1)))){
						qr.erase(oi);
					}else{
						if ((q.m_qPosition != TITLE) && (q.m_qDescription == SHOW_ALL)){
							get_description(oi->fDescription, SHOW_ALL, oi->fName,
									hiter->first_location, *siter);
						}
					}
				}
			} // end else
			
			break;
			
		default:	
			break;	
			
		}  // end switch
		
	} // end while
}

////////////////////////////////////
// private functions
//////////////////////////////////

/////////////////////////////////////////////////////////////////
//  help functions for query lookup

/*
 * get_wordlist: extract a list of words from a query string
 */
void CQprocessor::get_wordlist(string& qstr, TStrList& wlist)
{
	/* find the actual word list based on the query */
	string word;
	string::size_type pos_s, pos_e;
    
	pos_e = qstr.find_first_not_of(m_delimitchar);
	while (pos_e < qstr.length()){
		// get the word
		word.erase();
		pos_s = pos_e;
		pos_e = qstr.find_first_of(m_delimitchar, pos_s);
		word = qstr.substr(pos_s, pos_e-pos_s);
		pos_e = qstr.find_first_not_of(m_delimitchar, pos_e);

		// check if the word contains non-wordchar
		// turn the word into lower case
		uint i;
		for (i = 0; i < word.length(); i ++){
			if (!m_index->iswordchar(word[i]))
				break;
			else
				word[i] = tolower(word[i]);
		}
		if (i < word.length())
			continue;

		// check if we need to do wildcard matching 
		string::size_type pos = word.find_first_of("*");
		if (pos > word.length()){
			// not a wildcard matchingq
			wlist.push_back(word);
			continue;
		}

		// start wildcard matching
		TStrList substrList;
		string::size_type pos1, pos2 = 0;
			
		// find the all the substrings we need to compare
		while (pos2 < word.length()){
			pos1 = word.find_first_not_of("*", pos2);
			if (pos1 > word.length())
				break;
			pos2 = word.find_first_of("*", pos1);
			string partw = word.substr(pos1, pos2 - pos1);
			substrList.push_back(partw);
			partw.erase();
		}
			
		// find all the words that matches the substring list
		for (int i = 0; i < HASH_TABLE_SIZE; i ++){
			if (m_index->m_words[i].size() <= 0)
				continue;

			TWordList::iterator witer;
			for (witer = m_index->m_words[i].begin(); 
			     witer != m_index->m_words[i].end(); witer ++){

				TStrList::iterator siter = substrList.begin();
				int found = 1;
				unsigned int j = 1;
				string::size_type p = 0, p1;
					
				while ((siter != substrList.end()) && (found)){
					p1 = witer->word.find(*siter, p);
					if ((j == 1) && (pos > 0)){
						// first substr must match the word begining
						if (p1 == 0)
							p += siter->length();
						else
							found = 0;
					}else{
						// last substr must match the word ending
						if ((j == substrList.size()) && (pos2 > word.length())){
							if (p1 + siter->length() == witer->word.length())
								p += siter->length();
								else
									found = 0;
						}else{
							if (p1 < witer->word.length())
								p += siter->length();
							else
								found = 0;
						}
					}
					siter ++;
					j ++;
				}
				if (found){
					wlist.push_back(witer->word);
				}
			} // for (witer)
		} // for (int i)
		
		word.erase();
	} // end while
}

/*
 * get_description: get descriptions from files that match a word
 */
void CQprocessor::get_description(string& description, int allresults, 
				  string& fname, long location, string& word)
{
	/* open the file */
	FILE *fp = fopen(fname.c_str(), "r");
	if (!fp){
		mingle_debug1("\nCQuery::get_description(): cannot open file ");
		mingle_debug1(fname.c_str());
		return;
	}
	
	/* go to the first location for the first line */
	long offset;
	char buf[MAXSTRSIZE];
	if (location > MAXSTRSIZE)
		offset = location - MAXSTRSIZE;
	else
		offset = 0;
	fseek(fp, offset, SEEK_SET);
	while (offset < location){
		fgets(buf, MAXSTRSIZE-1, fp);
		offset += strlen(buf);
	}
	description += "#: ";
	description += buf;
	
	/* if only the first result is needed, stop here */
	if (!allresults){
		fclose(fp);
		return;
	}

	/* if need to retrieve all results, go get them */
	char bword[MAXSTRSIZE], *src;
	while (fgets(buf, MAXSTRSIZE-1, fp)){
		src = buf;
		do{
			src = get_next_word(bword, MAXSTRSIZE, src);
		}while ((*src != '\0') &&(strcmp(word.c_str(), bword) != 0));
		if (*src != '\0'){
			description += "#: ";
			description += buf;
		}
	}

	fclose(fp);			
}

/*
 * get_file_info: return file size, last modify time
 */
int CQprocessor::get_file_info(string& fname, int& fSize, time_t& fLmtime)
{
	struct stat sbuf;
	if (stat(fname.c_str(), &sbuf) != 0){
		return -1;
	}	
	
	fSize = sbuf.st_size;
	fLmtime = sbuf.st_mtime; 
	return 0;
}

/*
 * get_next_word: get a word in a buffer from position pointed by src
 */
char* CQprocessor::get_next_word(char *buf, int size, char* src)
{
	int i = -1, j = 0;

	/* find first valid char */
	while ((src[++i] != '\0') && (m_index->isdelimitchar(src[i])));
	if (src[i] == '\0'){
		buf[0] = '\0';
		return (src+i);
	}

	/* scan the word */
	while ((j < size - 1) && (src[i] != '\0') && 
	       (m_index->iswordchar(src[i]))){
		buf[j ++] = tolower(src[i++]);
	}
	
	/* check if we meet the delimit character */
	if ((j == size - 1) || (src[i] == '\0') || 
	    (m_index->isdelimitchar(src[i]))){
		buf[j] = '\0';
		return (src+i);
	}

	/* if not, seek until we meet one */
	while ((!m_index->isdelimitchar(src[++i])) && (src[i] != '\0'));
	return (src+i);
}

/*
 * in_title: check if a word has appeared in fname
 *           need to convert fname to lower cases before matching
 */
bool CQprocessor::in_title(string& fname, string& word)
{
	string lowfname = fname;

	for (unsigned int i = 0; i < lowfname.length(); i ++){
		lowfname[i] = tolower(lowfname[i]);
	}

	return (lowfname.find(word, 0) != string::npos);

}
