////////////////////////////////////////////////////////////////////////////////
// Mercury and Colyseus Software Distribution 
// 
// Copyright (C) 2004-2005 Ashwin Bharambe (ashu@cs.cmu.edu)
//               2004-2005 Jeffrey Pang    (jeffpang@cs.cmu.edu)
//                    2004 Mukesh Agrawal  (mukesh@cs.cmu.edu)
// 
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2, or (at
// your option) any later version.
// 
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
// USA
////////////////////////////////////////////////////////////////////////////////
#include <map>
#include <hash_map.h>
#include <vector>
#include <string>
#include <list>
#include <util/debug.h>
#include <stdlib.h>
#include <util/Options.h>
#include <gameapi/common.h>
#include <util/GPL/callback.h>
#include <util/Benchmark.h>
#include <mercury/ObjectLogs.h>

#include "putils.cxx"

double START;
float SLOWDOWN;
int SKIPTIME;
int LENGTH;
int MAXSAMPLE;
bool SHOWALL;
char OUTDIR[1024];

typedef unsigned long psid_t;

struct hash_psid_t {
    size_t operator () (const psid_t& a) const {
	return hash<psid_t> () (a);
    }
};

struct eq_psid_t { 
    bool operator () (const psid_t& a, const psid_t& b) const {
	return a == b;
    }
};	

typedef hash_map<psid_t, int, hash_psid_t, eq_psid_t> IDintMap;
typedef IDintMap::iterator IDintMapIter;

typedef vector<psid_t> VS;
typedef VS::iterator VSIter;
typedef VS *PVS;

typedef hash_map<psid_t, PVS, hash_psid_t, eq_psid_t> IDPVSMap;
typedef IDPVSMap::iterator IDPVSMapIter;

OptionType options[] = {
    { 'a', "showall", OPT_BOOL, "", &SHOWALL, "0", (void *) "1" },
    { 'S', "start", OPT_DBL, "", &START, "-1.0", NULL },
    { 'l', "length", OPT_INT, "", &LENGTH, "600", NULL  },
    { 's', "skiptime", OPT_INT, "", &SKIPTIME, "120", NULL  },
    { 'T', "maxsamples", OPT_INT, "", &MAXSAMPLE, "5000", NULL  },
    { 'o', "outdir", OPT_STR, "", OUTDIR, ".", NULL },
    { 'w', "slowdown", OPT_FLT, "", &SLOWDOWN, "0.0", NULL },
    { 0, 0, 0, 0, 0, 0, 0 },
};

vector<string> DisLatLogs;         // list of all logs
IDintMap       TrackingIDs;        // the sampled IDs we want to track
IDPVSMap       AliasMap;           // all alias mappings
IDintMap       PassPermitted;      // temporary hashtable for pruning alias map
IDintMap       AllowedIDs;         // IDs to filter from the log, finally

void collect_aliases_and_updates () {
    int size = 0;
    int aliases = 0, updates = 0;
    VS tracklist;

    cerr << "first pass... collecting all update IDs and aliases [" << 
	DisLatLogs.size () << " logs]" << endl;
    int nf = 0;

    for (vector<string>::iterator it = DisLatLogs.begin (); it != DisLatLogs.end (); ++it) {
	FILE *fp = open_log (*it);
	if (!fp) 
	    Debug::die ("could not open log %s", it->c_str ()); 

	show_progress (nf++, DisLatLogs.size ());

	double start = START, end = 0, tim;
	int type;
	psid_t id;
	psid_t alias;

	START (FILETIME);
	memset (line, 0, sizeof (line));
	while (fgets (line, sizeof (line), fp)) {
	    if (!match_capture (
				"(\\d+\\.\\d+)\\t(\\d+)\\t\\d+\\t([0-9A-F]{8}?)(.*)",
				(char *) line,
				4, /* npats */
				(char **) patbufs)) {
		Debug::warn ("bad log line: %s", line);
		continue;
	    }

	    sscanf (patbufs[0], "%lf", &tim);
	    type = atoi (patbufs[1]);
	    id = (psid_t) strtoll (patbufs[2], NULL, 16);

	    if (start < 0) 
		start = tim;
	    if (tim < start + SKIPTIME) {
		continue;
	    }
	    if (LENGTH > 0 && tim > start + SKIPTIME + LENGTH) { 
		// fprintf (stderr, "%d %.3f %.3f %.3f %d\n", LENGTH, tim, start, START, SKIPTIME);
		break;
	    }
	    if (tim > end)
		end = tim;

	    if (type == DiscoveryLatEntry::ALIAS) {
		alias = (psid_t) strtoll (patbufs[3] + 1, NULL, 16);

		IDPVSMapIter it = AliasMap.find (id);
		if (it == AliasMap.end ()) { 
		    VS *nv = new VS ();
		    nv->push_back (alias);
		    AliasMap.insert (IDPVSMap::value_type (id, nv));
		}
		else {
		    it->second->push_back (alias);
		}

		aliases++;
	    }
	    else if (type == DiscoveryLatEntry::UPDATE_DONE) {
		updates++;
		if (drand48 () > 0.5) {   // sample;
		    if (size == MAXSAMPLE) 
			tracklist[(int) (drand48 () * size)] = id;
		    else {
			tracklist.push_back (id);
			size++;
		    }
		}
	    }
	}
	close_log (fp, *it);
	STOP (FILETIME);
    }

    for (VSIter it = tracklist.begin (); it != tracklist.end (); ++it) 
	TrackingIDs.insert (IDintMap::value_type (*it, 1));
    cerr << "* found " << TrackingIDs.size () << " ids..." << endl;
    cerr << "* found " << aliases << " aliases and " << updates << " updates..." << endl;

}

void prune_alias_map ()
{
    cerr << "pruning alias maps..." << endl;
    PassPermitted.clear ();
    for (IDintMapIter it = TrackingIDs.begin (); it != TrackingIDs.end (); ++it) {
	PassPermitted[it->first] = 0;
	AllowedIDs[it->first] = 1;
    }

    for (int pass = 0; pass < 2; pass++) {
	cerr << "* pass " << pass << endl;

	// go through all the alias mappings;
	// for pass=0, mappings pointing to sampled ids will be allowed
	// for pass=1, mappings pointing to ids allowed in pass=0 will be allowed

	for (IDPVSMapIter oit = AliasMap.begin (); oit != AliasMap.end (); ++oit) {
	    psid_t id = oit->first;
	    PVS pvaliases = oit->second;
	    ASSERT (pvaliases != NULL);

	    for (VSIter vit = pvaliases->begin (); vit != pvaliases->end (); ++vit) {
		psid_t alias = *vit;

		IDintMapIter it2 = PassPermitted.find (alias);
		if (it2 == PassPermitted.end () || it2->second != pass)
		    continue;

		AllowedIDs[id] = 1;
		PassPermitted[id] = pass + 1;
	    }
	}
    }
}

void dump_alias_maps ()
{
    cerr << "dumping pruned alias maps..." << endl;

    FILE *fp = fopen (merc_va ("%s/aliases.map", OUTDIR), "w");
    if (!fp) 
	Debug::die ("could not open `%s/aliases.map' file for writing", OUTDIR);

    fprintf (fp, "Tracked IDs\n");
    for (IDintMapIter it = TrackingIDs.begin (); it != TrackingIDs.end (); ++it) 
	fprintf (fp, "%.8lX\n", (unsigned long) it->first);
    fprintf (fp, "=====================\n");

    for (IDPVSMapIter it = AliasMap.begin (); it != AliasMap.end (); ++it) {
	if (AllowedIDs.find (it->first) == AllowedIDs.end ())
	    continue;

	PVS pva = it->second;
	ASSERT (pva != NULL);
	ASSERT (pva->size () > 0);
	fprintf (fp, "%.8lX", (unsigned long) it->first);
	for (VSIter vit = pva->begin (); vit != pva->end (); ++vit) {
	    if (AllowedIDs.find (*vit) == AllowedIDs.end ())
		continue;
	    fprintf (fp, " %.8lX", (unsigned long) *vit);
	}
	fprintf (fp, "\n");
    }
    fclose (fp);
}

void dump_filtered_logs ()
{
    IDintMap alias_counts, hops_tracker;

    cerr << "dumping filtered logs..." << endl;
    FILE *gp = popen (merc_va ("sort -n | gzip - > %s/discov.log.gz", OUTDIR), "w");
    if (!gp) 
	Debug::die ("could not open `%s/discov.fil' file for writing", OUTDIR);

    int nf = 0;
    for (vector<string>::iterator it = DisLatLogs.begin (); it != DisLatLogs.end (); ++it) {
	FILE *fp = open_log (*it);
	if (!fp) 
	    Debug::die ("could not open log %s", it->c_str ()); 

	show_progress (nf++, DisLatLogs.size ());
	double start = START, end = 0, tim;
	int type;
	int hops;
	psid_t id;
	psid_t alias;

	memset (line, 0, sizeof (line));
	while (fgets (line, sizeof (line), fp)) {
	    if (!match_capture (
				"(\\d+\\.\\d+)\\t(\\d+)\\t(\\d+)\\t([0-9A-F]{8}?)(.*)",
				(char *) line,
				5, /* npats */
				(char **) patbufs)) {
		Debug::warn ("bad log line: %s", line);
		continue;
	    }

	    sscanf (patbufs[0], "%lf", &tim);
	    type = atoi (patbufs[1]);
	    hops = atoi (patbufs[2]);
	    id = (psid_t) strtoll (patbufs[3], NULL, 16);

	    if (start < 0) 
		start = tim;
	    if (tim < start + SKIPTIME)
		continue;
	    if (LENGTH > 0 && tim > start + SKIPTIME + LENGTH) 
		break;
	    if (tim > end)
		end = tim;

	    // note collisions here. we can't get rid of them here,
	    // but the next script can discard these IDs from the 
	    // read logs.
	    if (type == DiscoveryLatEntry::PUB_SEND || 
		type == DiscoveryLatEntry::SUB_SEND || 
		type == DiscoveryLatEntry::PUB_STORE || 
		type == DiscoveryLatEntry::MATCH_SEND) 
		{
		    bool next = false;
		    if (type == DiscoveryLatEntry::PUB_STORE) {			
			IDintMapIter it = hops_tracker.find (id);
			if (it != hops_tracker.end ()) {
			    if (it->second != hops)
				next = true;
			}

			hops_tracker[id] = hops;
		    }

		    // cerr << "considering id " << merc_va ("%.8lX", id) << " with next=" << next << endl;
		    if (!next) {
			IDintMapIter it = alias_counts.find (id);
			if (it != alias_counts.end ())
			    it->second++;
			else
			    alias_counts[id] = 1;
		    }
		}

	    if (AllowedIDs.find (id) == AllowedIDs.end ())
		continue;
	    fprintf (gp, "%s", line);
	}

	close_log (fp, *it);
    }
    pclose (gp);

    int dups = 0;
    for (IDintMapIter it = alias_counts.begin (); it != alias_counts.end (); ++it) {
	if (it->second > 1) {
	    dups++;

	    AllowedIDs.erase (it->first);
	}
    }
    cerr << "* found " << dups << " duplicates " << endl;
}

int main (int argc, char *argv[])
{
    DBG_INIT (NULL);
    InitCPUMHz();
    Benchmark::init();

    int nargc = ProcessOptions (options, argc, argv, false);
    if (SLOWDOWN > 1.0) {
	SKIPTIME = (int) (SKIPTIME * SLOWDOWN);
	LENGTH = (int) (LENGTH * SLOWDOWN);
    }

    for (int i = 1; i < nargc; i++) {
	DisLatLogs.push_back (argv[i]);
    }

    for (int i = 0; i < NPATBUFS; i++)
	patbufs[i] = new char[1024];

    TIME (collect_aliases_and_updates ());
    TIME (prune_alias_map ());
    TIME (dump_filtered_logs ());
    TIME (dump_alias_maps ());

    Benchmark::print ();
}

// vim: set sw=4 sts=4 ts=8 noet: 
// Local Variables:
// Mode: c++
// c-basic-offset: 4
// tab-width: 8
// indent-tabs-mode: t
// End:
