////////////////////////////////////////////////////////////////////////////////
// Mercury and Colyseus Software Distribution 
// 
// Copyright (C) 2004-2005 Ashwin Bharambe (ashu@cs.cmu.edu)
//               2004-2005 Jeffrey Pang    (jeffpang@cs.cmu.edu)
//                    2004 Mukesh Agrawal  (mukesh@cs.cmu.edu)
// 
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License as
// published by the Free Software Foundation; either version 2, or (at
// your option) any later version.
// 
// This program is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
// 
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
// USA
////////////////////////////////////////////////////////////////////////////////

#include <om/OMLoadManager.h>
#include <om/Manager.h>
#include <om/DirectRouter.h>
#include <om/EventDemux.h>
#include <om/TestLogs.h>
#include <mercury/Message.h>
#include <util/OS.h>
#include <util/Utils.h>

const float OMLoadSearcher::MAX_SPARE_CAPACITY; // = 1.25 * 1000 * 1000;

///////////////////////////////////////////////////////////////////////////////

NodeLoadInfo::NodeLoadInfo(Packet *pkt) : 
    TimedStruct(ManagerParams::NODE_LOAD_INFO_TTL),
    sid(pkt), toLowWaterMark(pkt->ReadFloat()), 
    toHighWaterMark(pkt->ReadFloat()), load(pkt->ReadFloat())
{
}
void NodeLoadInfo::Serialize(Packet *pkt)
{
    sid.Serialize(pkt);
    pkt->WriteFloat(toLowWaterMark);
    pkt->WriteFloat(toHighWaterMark);
    pkt->WriteFloat(load);
}
uint32 NodeLoadInfo::GetLength()
{
    return sid.GetLength() + 4 + 4 + 4;
}

///////////////////////////////////////////////////////////////////////////////

EventType PUB_OM_LOAD;

OMLoadEvent::OMLoadEvent(uint32 hubid, const NodeLoadInfo& info) :
    m_LoadInfo(info)
{
    // if we aren't below the low water mark, we shouldn't be publishing!
    ASSERT(info.toLowWaterMark > 0);

    Value spare = 
	Manager::GetInstance()->ConvertToValue(hubid,
					       // publish a negative value
					       // since mercury will search
					       // the smallest data values
					       // first, then the higher ones
					       // and we want to search for
					       // the guy with the max spare
					       // capacity
					       -info.toLowWaterMark,
					       -OMLoadSearcher::MAX_SPARE_CAPACITY, 0);

    Tuple t(hubid, spare);
    AddTuple(t);
}
bool OMLoadEvent::LessThan(const Event *oe) const
{
    if (Event::LessThan(oe) || Event::LessThan(oe))
	return Event::LessThan(oe);
    ASSERT(oe->GetType() == PUB_OM_LOAD);
    return less_SID()(m_LoadInfo.sid,
		      static_cast<const OMLoadEvent *>(oe)->GetLoadInfo().sid);
}
void OMLoadEvent::OnInterestMatch(Interest *in)
{
}
bool OMLoadEvent::OverwriteEvent(Event *old) const
{
    if (old->GetType() != PUB_OM_LOAD)
	return false;

    OMLoadEvent *le = static_cast<OMLoadEvent *>(old);
    return le->GetLoadInfo().sid == m_LoadInfo.sid;
}
ostream& OMLoadEvent::Print (ostream& out) const
{
    return PointEvent::Print(out);
}

OMLoadEvent::OMLoadEvent(Packet *pkt) : PointEvent(pkt)
{
    m_LoadInfo = NodeLoadInfo(pkt);
}
void OMLoadEvent::Serialize(Packet *pkt)
{
    PointEvent::Serialize(pkt);
    m_LoadInfo.Serialize(pkt);
}
uint32 OMLoadEvent::GetLength()
{
    return PointEvent::GetLength() + m_LoadInfo.GetLength();
}
void OMLoadEvent::Print(FILE *f)
{
    PointEvent::Print(f);
}

///////////////////////////////////////////////////////////////////////////////

EventType INTEREST_OM_LOAD;

OMLoadInterest::OMLoadInterest(uint32 hubid, CostMetric minRequired)
{
    ASSERT(minRequired < OMLoadSearcher::MAX_SPARE_CAPACITY);

    // see the publication above for why this is negative
    Value max = Manager::GetInstance()->ConvertToValue(hubid, -OMLoadSearcher::MAX_SPARE_CAPACITY, -OMLoadSearcher::MAX_SPARE_CAPACITY, 0);
    Value required = Manager::GetInstance()->ConvertToValue(hubid, -minRequired, -OMLoadSearcher::MAX_SPARE_CAPACITY, 0);

    Constraint c(hubid, max, required);
    AddConstraint(c);
}

OMLoadInterest::OMLoadInterest(Packet *pkt) : Interest(pkt)
{
}
void OMLoadInterest::Serialize(Packet *pkt)
{
    Interest::Serialize(pkt);
}
uint32 OMLoadInterest::GetLength()
{
    return Interest::GetLength();
}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

const char *OMLoadSampler::GetName () const 
{ 
    return "OMLoadSampler"; 
}
int OMLoadSampler::GetLocalRadius () const 
{ 
    return 1; 
}

u_long OMLoadSampler::GetSampleLifeTime () const 
{ 
    return 60 * 1000; /* 60 secs */ 
}  
int OMLoadSampler::GetNumReportSamples () const 
{ 
    return 10; /* XXX */ 
}

int OMLoadSampler::GetRandomWalkInterval () const 
{
    return 500; /* XXX: ASHWIN: please set this to what you tihnk is reasonable or 
		   set it to Parameters::RandomWalkInterval */
}

Metric* OMLoadSampler::GetPointEstimate () 
{
    CostMetric load = m_Manager->GetLoad();
    CostMetric capacity = m_Manager->GetCapacity();

    CostMetric norm = load/capacity;

    return new OMLoadMetric(norm);
}
Metric* OMLoadSampler::MakeLocalEstimate (vector<Metric* >& samples)
{
    CostMetric total = 0;
    for (uint32 i=0; i<samples.size(); i++) {
	// XXX construct more accurately?
	OMLoadMetric *s = static_cast<OMLoadMetric *>(samples[i]);
	total += s->GetLoad();
    }

    return new OMLoadMetric(total/samples.size());
}

///////////////////////////////////////////////////////////////////////////////

MercuryLoadAggregator::MercuryLoadAggregator(uint32 hubid, 
					     MercuryNode *n, 
					     EventDemux *d, 
					     OMLoadSampler *s) : 
    m_HubID(hubid), m_Node(n), m_Demux(d), m_Sampler(s) {
    int ret = m_Node->RegisterSampler(m_HubID, s);
    ASSERT(ret >= 0);
}
bool MercuryLoadAggregator::EstimateSystemLoad(float *mean, float *stddev)
{
    vector<Sample *> samples;
    int ret = m_Node->GetSamples(m_HubID, m_Sampler, &samples);

    //INFO << "ret=" << ret << " samples=" << samples.size() << endl;

    if (ret < 0)
	return false;

    if (samples.size() > LOAD_MIN_AGGREGATION_SAMPLES) {
	*mean = 0;
	*stddev = 0;

	for (uint32 i=0; i<samples.size(); i++) {
	    Sample *s = samples[i];
	    const OMLoadMetric *m = 
		static_cast<const OMLoadMetric *>(samples[i]->GetMetric());

	    *mean += m->GetLoad();
	    *stddev += m->GetLoad()*m->GetLoad();
	}

	*mean /= samples.size();
	float var = (*stddev - *mean * *mean)/samples.size();
	*stddev = var > 0 ? sqrt( var ) : 0;

	return true;
    }

    return false;
}
void MercuryLoadAggregator::SendEvent(OMLoadEvent *ev)
{
    m_Node->SendEvent(ev);
}
void MercuryLoadAggregator::RegisterInterest(OMLoadInterest *in)
{
    m_Node->RegisterInterest(in);
}
OMLoadEvent *MercuryLoadAggregator::ReadEvent()
{
    return m_Demux->ReadEvent<OMLoadEvent>(PUB_OM_LOAD); 
}
void MercuryLoadAggregator::DoWork()
{
    // do nothing: Manager will call MercuryNode::DoWork()
}

///////////////////////////////////////////////////////////////////////////////

void BroadcastLoadAggregator::Broadcast(Message *m)
{
    for (uint32 i=0; i<m_Others.size(); i++) {
	if (m_Others[i] != m_Network->GetAppID()) {
	    m_Network->SendMessage(m, &m_Others[i], PROTO_UDP);
	}
    }
}
void BroadcastLoadAggregator::Send(SID target, Message *msg)
{
    ASSERT(msg->GetType() != 0);
    //INFO << "sending msg size=" << msg->GetLength() << " " << msg << endl;
    m_Network->SendMessage(msg, &target, PROTO_UDP);
}
SID BroadcastLoadAggregator::NextInRing()
{
    uint32 myIndex;
    for (myIndex=0; myIndex<m_Others.size(); myIndex++) {
	if (m_Others[myIndex] == m_Network->GetAppID()) {
	    break;
	}
    }
    return m_Others[ (myIndex + 1) % m_Others.size() ];
}
BroadcastLoadAggregator::BroadcastLoadAggregator(OMLoadManager *m)
{
    SID id;
    struct hostent *entry = gethostbyname(g_Preferences.hostname);
    if (entry)
	id.m_IP = ((struct in_addr *) entry->h_addr)->s_addr;
    else
	id.m_IP = inet_addr(g_Preferences.hostname);
    id.m_Port = ManagerParams::BROADCAST_LOAD_INFO_PORT;

    m_Manager   = m;
    m_Scheduler = new SchedulerWithoutEventQueue ();
    m_Network   = new RealNet(m_Scheduler, id, false);

    bool foundMyself = false;
    char *str = strtok(ManagerParams::BROADCAST_LOAD_INFO_OTHERS, ",;");
    while (str != NULL) {
	char *addr = str;
	SID other = SID(addr);
	ASSERT(other != SID_NONE);
	if (other == id)
	    foundMyself = true;
	m_Others.push_back( other );
	m_Map.insert( pair<SID,float>(other, -1) );
	str = strtok(NULL, ",;");
    } 
    ASSERT(m_Others.size() > 0);
    ASSERT(foundMyself);

    // sort them so everyone has this list in the same order
    stable_sort(m_Others.begin(), m_Others.end(), less_SID());

    m_Network->StartListening(PROTO_UDP);
}
BroadcastLoadAggregator::~BroadcastLoadAggregator() 
{
    delete m_Network;
    delete m_Scheduler;
}
bool BroadcastLoadAggregator::EstimateSystemLoad(float *mean, float *stddev)
{
    uint32 count = 1;
    *mean = 0;
    *stddev = 0;

    float local = m_Manager->GetLoad()/m_Manager->GetCapacity();

    *mean   += local;
    *stddev += local*local;

    for (uint32 i=0; i<m_Others.size(); i++) {
	float load = m_Map[m_Others[i]];
	if (load < 0)
	    continue;
	count++;
	*mean += load;
	*stddev += load*load;
    }

    *mean /= count;
    float var = (*stddev - *mean * *mean)/count;
    *stddev = var > 0 ? sqrt( var ) : 0;
    return true;
}
void BroadcastLoadAggregator::SendEvent(OMLoadEvent *ev)
{
    DB(-5) << "publishing light-load: " << ev << endl;

    TimeVal now;
    OS::GetCurrentTime(&now);
    if (ev->GetLifeTime() == 0) {
	WARN << "Got event with 0-TTL: " << ev << endl;
	return;
    }

    OMLoadEvent *save = ev->Clone();
    save->SetDeathTime(now + save->GetLifeTime());
    m_LocalEvents.push_back(save);
}
void BroadcastLoadAggregator::RegisterInterest(OMLoadInterest *in)
{
    DB(-5) << "registering for light-load: " << in << endl;

    SID me = m_Network->GetAppID();
    MsgSubscription msg(m_Manager->GetHubID(), me, in, me);

    SID target = NextInRing();
    if (target == m_Network->GetAppID()) {
	WARN << "I am my own successor in bcast ring!" << endl;
    } else {
	ASSERT(msg.GetType() == MSG_SUB);
	Send(target, &msg);
    }
}
OMLoadEvent *BroadcastLoadAggregator::ReadEvent()
{
    if (m_RemoteEvents.size() == 0)
	return NULL;
    else {
	OMLoadEvent *ev = m_RemoteEvents.front();
	m_RemoteEvents.pop_front();
	DB(-5) << "got result for light-load: " << ev << endl;
	return ev;
    }
}
void BroadcastLoadAggregator::DoWork()
{
    TimeVal now;
    OS::GetCurrentTime(&now);

    PERIODIC(ScaleTime<uint32>(100), now, GarbageCollect(now));
    PERIODIC(ScaleTime<uint32>(2000), now, BroadcastLoadEstimate());
    ProcessMessages(200);
}
void BroadcastLoadAggregator::ProcessMessages(sint32 maxMsgs) {
    IPEndPoint from((uint32)0, 0);
    Message *msg = 0;
    ConnStatusType status;

    do {
	status = m_Network->GetNextMessage(&from, &msg);

	DBG << "Got ConnectionStatus: " << g_ConnStatusStrings[status]
	    << endl;

	bool err = false;

	switch (status) {
	case CONN_NEWINCOMING:
	case CONN_OK:
	    ProcessMessage(msg);
	    break;
	case CONN_CLOSED:
	    DBG << "connection closed from: " << from.ToString() << endl;
	    break;
	case CONN_ERROR:
	    DBG << "connection error from: " << from.ToString() << endl;
	    err = true;
	    break;
	case CONN_NOMSG:
	    break;
	default:
	    Debug::die("Hmm... got weird connection status.");
	    break;
	}

	if (err) break;

    } while (status != CONN_NOMSG && maxMsgs-- > 0);
}
void BroadcastLoadAggregator::ProcessMessage(Message *m)
{
    if (m->GetType() == MSG_BLOB) {
	MsgBlob *b = static_cast<MsgBlob *>(m);
	if (b->len == sizeof(uint32)) {
	    ProcessLoadEstimate(b);
	} else {
	    WARN << "Got MsgBlob with wierd size=" << b->len << endl;
	}
    } else if (m->GetType() == MSG_SUB) {
	MsgSubscription *s = static_cast<MsgSubscription *>(m);
	HandleSubscription(s);
    } else if (m->GetType() == MSG_PUB) {
	MsgPublication *p = static_cast<MsgPublication *>(m);
	HandlePublication(p);
    } else {
	WARN << "Got unknown message type: " << m->GetType() << endl;
    }
    delete m;
}
void BroadcastLoadAggregator::ProcessLoadEstimate(MsgBlob *msg)
{
    START(OMLoadManager::Broadcast::ProcessLoadEstimate);

    SID from = msg->sender;

    uint32 dat;
    memcpy(&dat, msg->data, sizeof(uint32));
    uint32 val = ntohl( dat );
    float load = *((float *)&val);

    LoadEstimateMapIter b = m_Map.find(from);
    if (b == m_Map.end()) {
	WARN << "got load estimate from unknown host: " << from << endl;
    } else {
	b->second = load;
    }

    STOP(OMLoadManager::Broadcast::ProcessLoadEstimate);
}
void BroadcastLoadAggregator::BroadcastLoadEstimate()
{
    START(OMLoadManager::Broadcast::BroadcastLoadEstimate);

    SID me = m_Network->GetAppID();
    MsgBlob msg(sizeof(uint32), me);
    float load = m_Manager->GetLoad()/m_Manager->GetCapacity();
    uint32 val = htonl( *((uint32 *)&load) );
    memcpy(msg.data, &val, sizeof(uint32));

    Broadcast(&msg);

    STOP(OMLoadManager::Broadcast::BroadcastLoadEstimate);
}
void BroadcastLoadAggregator::HandleSubscription(MsgSubscription *msg)
{
    START(OMLoadManager::Broadcast::HandleSub);

    ASSERT(msg->sender != m_Network->GetAppID());
    ASSERT(msg->GetCreator() != m_Network->GetAppID());
    SID creator = msg->GetCreator();
    Interest *sub = msg->GetInterest();

    // see if we have any local pubs we can match to this sub
    OMLoadEvent *match = NULL;

    if (m_LocalEvents.size() > 0) {
	for (list<OMLoadEvent *>::iterator it = m_LocalEvents.begin();
	     it != m_LocalEvents.end(); it++) {
	    if (sub->Overlaps(*it)) {
		match = *it;
		m_LocalEvents.erase(it);
		break;
	    }
	}
    }

    if (match != NULL) {
	// found a match! send it to the creator!
	TimeVal now;
	OS::GetCurrentTime(&now);
	match->SetLifeTime(MAX(0, match->GetDeathTime() - now));

	SID me = m_Network->GetAppID();
	MsgPublication msg(m_Manager->GetHubID(), me, match, me);

	ASSERT(msg.GetType() == MSG_PUB);
	Send(creator, &msg);
	delete match;
    } else {
	// no match found, route to to the next guy on the ring
	SID target = NextInRing();
	if (msg->hopCount > 1024) {
	    WARN << "got sub with high hopCount (dropping): " << msg << endl;

	    STOP(OMLoadManager::Broadcast::HandleSub);
	    return;
	}
	if (target == creator) {
	    // whoops! msg looped all the way around the ring without
	    // finding a match. drop it!

	    STOP(OMLoadManager::Broadcast::HandleSub);
	    return;
	} else {
	    ASSERT(msg->GetType() == MSG_SUB);
	    msg->sender = m_Network->GetAppID();
	    Send(target, msg);
	}
    }

    STOP(OMLoadManager::Broadcast::HandleSub);
}
void BroadcastLoadAggregator::HandlePublication(MsgPublication *msg)
{
    START(OMLoadManager::Broadcast::HandlePub);

    // Got a match for our sub!
    OMLoadEvent *ev = static_cast<OMLoadEvent *>(msg->GetEvent())->Clone();

    m_RemoteEvents.push_back(ev);

    STOP(OMLoadManager::Broadcast::HandlePub);
}
void BroadcastLoadAggregator::GarbageCollect(const TimeVal& now)
{
    START(OMLoadManager::Broadcast::GarbageCollect);

    // expire stale local events
    for (list<OMLoadEvent *>::iterator it = m_LocalEvents.begin();
	 it != m_LocalEvents.end(); /* !!! */) {
	list<OMLoadEvent *>::iterator oit = it;
	oit++;
	if ((*it)->GetDeathTime() <= now) {
	    delete *it;
	    m_LocalEvents.erase(it);
	}
	it = oit;
    }

    STOP(OMLoadManager::Broadcast::GarbageCollect);
}

///////////////////////////////////////////////////////////////////////////////

OMLoadSearcher::OMLoadSearcher(OMLoadManager *m, OMLoadSampler *s, 
			       OMLoadAggregator *a) : 
    m_Manager(m), m_Sampler(s), m_Aggregator(a), m_IsQuerying(false), 
    m_WaitFor(0), m_LastSearch(TIME_NONE), m_LastQuery(TIME_NONE)
{
}
OMLoadSearcher::~OMLoadSearcher() 
{
}
void OMLoadSearcher::BeginQuery(float required, uint32 waitfor)
{
    ASSERT(required < MAX_SPARE_CAPACITY);

    bool wasQuerying = m_IsQuerying;

    m_IsQuerying = true;
    m_MinRequired = required;
    m_WaitFor = waitfor;

    /* Actually don't do this because when we get a query result, it may
       take sometime before the app reads it and takes action on it, so
       our load will remain high...
       // force an immediate query if we are starting looking anew
       if (!wasQuerying) {
       RegisterSearch();
       OS::GetCurrentTime(&m_LastSearch);
       }
    */
}
void OMLoadSearcher::EndQuery()
{
    m_IsQuerying = false;
}
uint32 OMLoadSearcher::NumResults()
{
    return m_Results.size();
}
void OMLoadSearcher::GetResults(vector< ref<NodeLoadInfo> > *ret)
{
    for (uint32 i=0; i<m_Results.size(); i++) {
	ret->push_back( m_Results[i] );
    }
    m_Results.clear();
}
void OMLoadSearcher::DoWork()
{
    TimeVal now;
    OS::GetCurrentTime(&now);

    if (m_LastQuery + PUBLISH_LOAD_INFO_TIMER <= now) {
	// publish our load if we are below our low water mark
	if (m_Manager->GetLoadStatus() == OMLOAD_LOW) {
	    PublishLoad();
	    m_LastQuery = now;
	}
    }
    if (m_IsQuerying && m_LastSearch + SEARCH_LOAD_INFO_TIMER <= now) {
	RegisterSearch();
	m_LastSearch = now;
    }

    START(OMLoadManager::Searcher::Aggregator::DoWork);
    m_Aggregator->DoWork();
    STOP(OMLoadManager::Searcher::Aggregator::DoWork);

    START(OMLoadManager::Searcher::ProcessResults);
    // see if we got any search results
    OMLoadEvent *ev;
    while ((ev = m_Aggregator->ReadEvent()) != NULL) {
	ref<NodeLoadInfo> info = 
	    new refcounted<NodeLoadInfo>(ev->GetLoadInfo());
	info->Refresh( MIN(NODE_LOAD_INFO_TTL, ev->GetLifeTime()) );
	m_Results.push_back(info);
    }
    // got enough search results, stop
    if (m_Results.size() >= m_WaitFor) {
	m_IsQuerying = false;
    }
    STOP(OMLoadManager::Searcher::ProcessResults);
}
void OMLoadSearcher::PublishLoad()
{
    OMLoadEvent ev(m_Manager->GetHubID(),
		   m_Manager->GetLoadInfo());
    ev.SetLifeTime( NODE_LOAD_INFO_TTL );

    m_Aggregator->SendEvent(&ev);
}
void OMLoadSearcher::RegisterSearch()
{
    ASSERT(m_MinRequired > 0);

    OMLoadInterest in(m_Manager->GetHubID(),
		      m_MinRequired);
    in.SetLifeTime( 0 );

    m_Aggregator->RegisterInterest(&in);
}

///////////////////////////////////////////////////////////////////////////////

OMLoadManager::OMLoadManager(const string& hubid, Manager *m, RealNet *n, 
			     OMLoadAggType t) :
    m_Manager(m), m_Network(n)
{
    m_HubID = m->GetAttrIndex(hubid); 

    DBG << "Inited with hubid=" << hubid << " aggr=" << t << endl;

    m_Sampler = new OMLoadSampler(this);

    if (t == OMLOADAGG_MERCURY) {
	m_Aggregator = new MercuryLoadAggregator(m_HubID, 
						 m->GetMerc(),
						 m->GetDemux(),
						 m_Sampler);
    } else if (t == OMLOADAGG_BROADCAST) {
	m_Aggregator = new BroadcastLoadAggregator(this);
    } else {
	WARN << "unknown load aggregator requested: " << t << endl;
    }

    m_Searcher = new OMLoadSearcher(this, m_Sampler, m_Aggregator);

    m_Load          = 0;
    m_LoadActual    = 0;
    m_HighWaterMark = HIGH_WATER_MARK;
    m_LowWaterMark  = LOW_WATER_MARK;
    m_LoadTarget    = (HIGH_WATER_MARK-LOW_WATER_MARK)/2 + LOW_WATER_MARK;
    m_WindowTarget  = HIGH_WATER_MARK-LOW_WATER_MARK;

    m_SystemLoadMean   = m_LoadTarget/CAPACITY;
    m_SystemLoadStdDev = m_WindowTarget/2/CAPACITY;
}
OMLoadManager::~OMLoadManager()
{
    // XXX can't delete the sampler, no way to unregister it from Merc
    //delete m_Sampler;
    delete m_Searcher;
    delete m_Aggregator;
}

const NodeLoadInfo& OMLoadManager::GetLoadInfo() const
{
    static NodeLoadInfo info;
    info = NodeLoadInfo(m_Manager->GetSID(),
			MAX(m_HighWaterMark - m_Load, 0),
			MAX(m_LowWaterMark - m_Load, 0),
			m_Load);

    return info;
}
OMLoadStatus OMLoadManager::GetLoadStatus() const {
    if (GetLoad() < GetLowWaterMark())
	return OMLOAD_LOW;
    else if (GetLoad() > GetHighWaterMark())
	return OMLOAD_HIGH;
    else if (GetLoad() < GetLoadTarget())
	return OMLOAD_MID_LOW;
    else
	return OMLOAD_MID_HIGH;
}
void OMLoadManager::Record(const NodeLoadInfo& info)
{
    NodeLoadMapIter p = m_LoadMap.find(info.sid);
    if (p == m_LoadMap.end()) {
	ref<NodeLoadInfo> rcd = new refcounted<NodeLoadInfo>(info);
	m_LoadMap.insert(pair< SID,ref<NodeLoadInfo> >(info.sid, rcd));
    } else {
	// save it if its TTL is longer than what we have
	// otherwise assume what we have is newer
	if (p->second->TimeLeft() < info.TimeLeft())
	    *p->second = info;
    }
}
void OMLoadManager::DoWork()
{

    TimeVal now;
    OS::GetCurrentTime(&now);

    PERIODIC(NODE_LOAD_INFO_TTL/SOFTSTATE_EXPIRE_GRAINULARITY, now,
	     GarbageCollect());

    PERIODIC(LOAD_ESTIMATION_TIMER, now,
	     EstimateLoadParameters());

    if ( GetLoadStatus() == OMLOAD_HIGH ) {
	// we are overloaded, begin searching for lightly loaded dudes
	m_Searcher->BeginQuery( GetLoad() - GetLoadTarget() );
    } else if ( GetLoadStatus() == OMLOAD_MID_LOW ||
		GetLoadStatus() == OMLOAD_LOW ) {
	// if we were previously searching for a lightly loaded dude
	// stop if we get below the target (assuming we never found a guy)
	// don't stop just when we return to below high water otherwise
	// we might oscillate
	m_Searcher->EndQuery();
    }

    START(OMLoadManager::Searcher::DoWork);
    m_Searcher->DoWork();
    STOP(OMLoadManager::Searcher::DoWork);

    START(OMLoadManager::ProcessSearchResults);
    ProcessSearchResults();
    STOP(OMLoadManager::ProcessSearchResults);

    if (ENABLE_LOADESTIMATEACCURACYLOG) {
	PERIODIC(500, now, {
	    LoadEstimateAccuracyEntry e;
	    e.localLoad        = m_Load/GetCapacity();
	    e.localLoadActual  = m_LoadActual/GetCapacity();
	    e.localTarget      = m_LoadTarget/GetCapacity();
	    e.localMargin      = m_WindowTarget/2/GetCapacity();
	    e.systemLoadMean   = m_SystemLoadMean;
	    e.systemLoadStdDev = m_SystemLoadStdDev;

	    LOG(LoadEstimateAccuracyLog, e);
	});
    }
}

void OMLoadManager::GarbageCollect()
{
    START(OMLoadManager::GarbageCollect);

    DBG << "garbage collecting expired node load info" << endl;

    TimeVal now;
    OS::GetCurrentTime(&now);

    for (NodeLoadMapIter it = m_LoadMap.begin();
	 it != m_LoadMap.end(); ) {
	if (it->second->TimedOut(now)) {
	    NodeLoadMapIter oit = it;
	    oit++;
	    m_LoadMap.erase(it);
	    it = oit;
	} else {
	    it++;
	}
    }

    STOP(OMLoadManager::GarbageCollect);
}
bool OMLoadManager::RecalculateSystemLoad()
{
    return m_Aggregator->EstimateSystemLoad(&m_SystemLoadMean,
					    &m_SystemLoadStdDev);
}
void OMLoadManager::EstimateLoadParameters()
{
    START(OMLoadManager::EstimateLoadParameters);

    DBG << "reestimating local load parameters" << endl;

    CostMetric objectCost = 0;
    CostMetric deltaCost  = 0;
    ObjectStore *store = m_Manager->GetObjectStore();
    GObject *obj;

    store->Begin();
    while ( (obj = store->Next()) != NULL) {
	if (!obj->IsReplica()) {
	    // cost to client
	    objectCost += obj->GetFixedCost();

	    // cost to interested servers
	    GObjectInfo *info = static_cast<GObjectInfo *>(obj->GetInfo());
	    ASSERT(info);
	    for (ReplicaPtrMapIter rit = info->GetRegisteredReplicas()->begin(); rit != info->GetRegisteredReplicas()->end(); rit++) {
		if ( rit->second.GetNumInterests() > 0 ) {
		    deltaCost += obj->GetDeltaCost();
		}
	    }

	}
    }

    // Mercury also has cost, but it does its own load balancing

    TimeVal now;
    OS::GetCurrentTime(&now);

    // Only count outbound costs -- not sure what mixing + matching does...
    // Still read the inbound costs though, since otherwise realnet will
    // never cleanup after itself.
    CostMetric  inbound = m_Network->GetInboundUsage(now);
    CostMetric outbound = m_Network->GetOutboundUsage(now);

    DB(5) << " inbound=" << inbound
	  << " outbound=" << outbound
	  << " predict=" << (objectCost + deltaCost)
	  << " objcost=" << objectCost 
	  << " deltacost=" << deltaCost << endl;

    CostMetric new_load_val = objectCost + deltaCost;

    m_Load = 
	LOAD_EMA_WEIGHT*m_Load + (1-LOAD_EMA_WEIGHT)*new_load_val;

    m_LoadActual =
	LOAD_EMA_WEIGHT*m_LoadActual + (1-LOAD_EMA_WEIGHT)*outbound;

    ///////////////////////////////////////////////////////////////////////////

    CostMetric load = m_Load;

    DB(5) << "=========" << endl;
    DB(5) << "curr_load=" << load << " target=" << m_LoadTarget << endl;

    DB(5) << "curr_hw=" << GetHighWaterMark() << endl;
    DB(5) << "curr_lw=" << GetLowWaterMark() << endl;

    // dynamically recalculate water marks
    if ( RecalculateSystemLoad() ) {
	CostMetric sysAvgLoad = m_SystemLoadMean*GetCapacity();
	CostMetric sysStdLoad = m_SystemLoadStdDev*GetCapacity();

	DB(5) << "sys_avg_load="  << sysAvgLoad
	      << " sys_std_load=" << sysStdLoad << endl;

	m_LoadTarget = 
	    LOAD_EMA_WEIGHT*m_LoadTarget + (1-LOAD_EMA_WEIGHT)*sysAvgLoad;

	m_WindowTarget = MAX(LOAD_MIN_WINDOW, 
			     2*m_LoadTarget*LOAD_WINDOW_ALPHA);

	m_HighWaterMark = m_LoadTarget + m_WindowTarget/2;
	m_LowWaterMark  = m_LoadTarget - m_WindowTarget/2;

	DB(5) << "new_target=" << m_LoadTarget << endl;
	DB(5) << "new_window=" << m_WindowTarget << endl;
	DB(5) << "new_hw=" << GetHighWaterMark() << endl;
	DB(5) << "new_lw=" << GetLowWaterMark() << endl;
    }

    STOP(OMLoadManager::EstimateLoadParameters);
}
void OMLoadManager::ProcessSearchResults()
{
    DBG << "processing load search results" << endl;

    if (m_Searcher->NumResults() > 0) {
	vector< ref<NodeLoadInfo> > res;
	m_Searcher->GetResults( &res );
	ASSERT(res.size() > 0);
	for (uint32 i=0; i<res.size(); i++) {
	    Record( *res[i] );
	}
    }
}
// vim: set sw=4 sts=4 ts=8 noet: 
// Local Variables:
// Mode: c++
// c-basic-offset: 4
// tab-width: 8
// indent-tabs-mode: t
// End:
