#! /usr/bin/python

import matplotlib
from pylab import *
import sys, urllib, re
import getopt

_rtip = re.compile(r'<td><a href="taskdetails[^"]*">(tip_\d+_\d+_\w_\d+)</a></td>')
_rstart = re.compile(r'<td>(\d?\d)-(\w\w\w)-(\d\d\d\d) (\d\d):(\d\d):(\d\d)<br/></td>')
_rend = re.compile(r'<td>(\d?\d)-(\w\w\w)-(\d\d\d\d) (\d\d):(\d\d):(\d\d) \(((?:\d+mins, )?\d+sec)\)<br/></td>')

_rtipHistory = _rtip
_rstartHistory = re.compile(r'<td>(\d?\d)/(\d\d) (\d\d):(\d\d):(\d\d)</td>')
_rendHistory = re.compile(r'<td>(\d?\d)/(\d\d) (\d\d):(\d\d):(\d\d) \(((?:\d+mins, )?\d+sec)\)</td>')

def secondOfDay (hms):
    """Convert an (hour,minute,second) list to the second-of day, counting from midnight."""
    assert(len(hms) == 3)
    if type(hms[0]) == type(''):
        hms = map(int, hms)
    return hms[0]*3600 + hms[1]*60 + hms[2]

def parseStatsPage (url):
    """Scrape given URL and return a list of (tip, starttime, endtime) tuples."""
    fp = urllib.urlopen(url);
    stats = []
    inTip = False
    for l in fp:
        mtip = _rtip.findall(l)
        mstart = _rstart.findall(l)
        mend = _rend.findall(l)
        if mtip:
            assert(not inTip)
            inTip = True
            tipName = mtip[0]
        if mstart:
            assert(inTip)
            #startTime = ':'.join(mstart[0][3:6])
            startTime = secondOfDay(mstart[0][3:6])
        if mend:
            assert(inTip)
            inTip = False
            #endTime = ':'.join(mend[0][3:6])
            endTime = secondOfDay(mend[0][3:6])
            stats.append((tipName, startTime, endTime))
    fp.close()
    return stats

def parseJobStats (trackerUrl, jobId):
    mapUrl = "%s/jobtasks.jsp?jobid=%s&type=map&pagenum=1" % (trackerUrl, jobId)
    stats = parseStatsPage(mapUrl)
    reduceUrl = "%s/jobtasks.jsp?jobid=%s&type=reduce&pagenum=1" % (trackerUrl, jobId)
    stats.extend(parseStatsPage(reduceUrl))
    return stats
    
def printUsage ():
    print >>sys.stderr, "Usage:  timeline.py [options] <jobId>"
    print >>sys.stderr, "  -t, --tracker <trackerUrl> "
    print >>sys.stderr, "  -q, --quiet "
    print >>sys.stderr, "  -h, --help "
    
def main(args):
    trackerUrl = 'http://b0622b01e1.hny.distillery.ibm.com:50030'
    annotateWithText = True
    
    opts, args = getopt.getopt(args, "t:qh", ["tracker=", "quiet", "help"])
    for opt, arg in opts:
        if opt == "-t" or opt == "--tracker":
            trackerUrl = arg
        elif opt == "-q" or opt == "--quiet":
            annotateWithText = False
        elif opt == "-h" or opt == "--help":
            printUsage()
            sys.exit(0)
        else:
            sys.err.println("Unrecongized option: " + opt)
            printUsage()
            sys.exit(-1)

    if len(args) != 1:
        printUsage()
        sys.exit(-1)
    jobId = args[0]

    stats = parseJobStats(trackerUrl, jobId)

    colorCodes = map(lambda t: (t[0][-8] == 'm' and 'g') or 'b', stats)
    rects = bar(bottom=range(1,len(stats)+1), width=map(lambda t: t[2]-t[1], stats), 
                height=0.95, left=map(lambda t:t[1], stats), 
                color=colorCodes, edgecolor=colorCodes,
                orientation='horizontal')
    if annotateWithText:
        for rect, stat in zip(rects, stats):
            text(rect.get_x() + 0.5, rect.get_y()+0.05, stat[0], color='y', fontsize=10)
    title('%s timeline' % jobId)
    axis('tight')

    show()

if __name__ == '__main__':
    main(sys.argv[1:])
