#!/usr/bin/python
# Filename: PreCalculation.py
#Yifan Xue

import sys
import pickle
import os

sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/lib")
from Parsing import NULLDOM

def sanityChecks(path):
    if not os.path.exists(path+'/formatted/domainCounts-domArchs.pkl'):
        print("'domainCounts-domArchs.pkl' file does not exist in '{}'/formatted!".format(path))
        print("Exiting...")
        sys.exit(0)
    if not os.path.exists(path + '/formatted/doubleCounts-domArchs.pkl'):
        print("'doubleCounts-domArchs.pkl' file does not exist in '{}'/formatted!".format(path))
        print("Exiting...")
        sys.exit(0)
    if not os.path.exists(path + '/formatted/tripleCounts-domArchs.pkl'):
        print("'tripleCounts-domArchs.pkl' file does not exist in '{}'/formatted!".format(path))
        print("Exiting...")
        sys.exit(0)


#===================================#
################Main#################
#===================================#

if __name__ == "__main__":
    # Check the if the file exists
    print("Sanity checks...")
    sanityChecks(sys.argv[1])

    print("Reading files...")
    # Read the single domain file
    singleFile = open(sys.argv[1]+"/formatted/domainCounts-domArchs.pkl","rb")
    singleDom = pickle.load(singleFile)
    singleFile.close
    # Read the double domain file
    doubleFile = open(sys.argv[1] + "/formatted/doubleCounts-domArchs.pkl", "rb")
    doubleDom = pickle.load(doubleFile)
    doubleFile.close()
    # Read the triple domain file
    tripleFile = open(sys.argv[1] + "/formatted/tripleCounts-domArchs.pkl", "rb")
    tripleDom = pickle.load(tripleFile)
    tripleFile.close()

    # Initialization
    print("Initializing...")
    startDomain = {} # A dictionary to store all the possible first inserted domains and their counts
    onlySingleton = {} # A dictionary to store all the domains that are only observed as singletons
    doubleEndCount = {} # A dictionary to store the occurence count for double domains with the second position flexible
    doubleEndType = {} # A dictionary to store the number of distinct domains that are not observed after a certain domain in the raw dataset
    tripleMiddleDomain = {}  # A dictionary of list to store the domains that can exist between two given domains

    for key in singleDom:
        if key=='sum' or key==NULLDOM: # Skip the 'sum' and null domain entries
            continue
        onlySingleton.setdefault(key,1)
        startDomain.setdefault(key,0)

    # Processing
    print("Pre-processing...")
    # For onlySingleton
    for key in tripleDom:
        if key=='sum': # Skip the 'sum' entry
            continue
        if key[0]==NULLDOM and key[2]==NULLDOM: # Skip singletons
            continue
        elif key[0]==NULLDOM and key[2]!=NULLDOM:
            if key[1] in onlySingleton:
                del onlySingleton[key[1]]
                #onlySingleton.remove(key[1])
            if key[2] in onlySingleton:
                del onlySingleton[key[2]]
                #onlySingleton.remove(key[2])
        elif key[2]==NULLDOM and key[0]!=NULLDOM:
            if key[0] in onlySingleton:
                del onlySingleton[key[0]]
                #onlySingleton.remove(key[0])
            if key[1] in onlySingleton:
                del onlySingleton[key[1]]
                #onlySingleton.remove(key[1])
        else:
            if key[0] in onlySingleton:
                del onlySingleton[key[0]]
                #onlySingleton.remove(key[0])
            if key[1] in onlySingleton:
                del onlySingleton[key[1]]
                #onlySingleton.remove(key[1])
            if key[2] in onlySingleton:
                del onlySingleton[key[2]]
                #onlySingleton.remove(key[2])
    # For startDomain
    startDomainSum = 0
    for key in startDomain:
        startDomain[key] = singleDom[key]
        startDomainSum += singleDom[key]
    startDomain['sum'] = startDomainSum
    # For doubleEndCount and doubleEndType
    xxx = 0
    for key in doubleDom:
        if key == 'sum':  # Skip the 'sum' entry
            continue
        key2 = ('1', key[0])
        c = doubleEndCount.setdefault(key2, 0)
        doubleEndCount[key2] = c + doubleDom[key]
        d = doubleEndType.setdefault(key2, 0)
        doubleEndType[key2] = d + 1
    distinctDomainNumber = len(doubleEndCount)
    for key in doubleEndType:
        doubleEndType[key] = distinctDomainNumber - doubleEndType[key]
    # For tripleMiddleDomain
    for key in tripleDom:
        if key == 'sum':  #Skip the 'sum' entry
            continue
        key2 = ('1', key[0], key[2])
        d = tripleMiddleDomain.setdefault(key2, [])
        d.append(key[1])

    # Write result to files
    print("Writing results to files...")
    # onlySingleton
    print("We have got {} of onlySingleton".format(len(onlySingleton)))
    with open(sys.argv[1]+'/formatted/onlySingleton.pkl', 'w+b') as onlySingletonFile:
        pickle.dump(onlySingleton, onlySingletonFile)
    outPutFile=open(sys.argv[1]+'/formatted/onlySingleton.txt','w')
    for key in onlySingleton:
        outPutFile.write(str(key)+'\n')
    outPutFile.close()
    # startDomain
    print("We have got {} of start domains".format(len(startDomain)-1))
    with open(sys.argv[1] + '/formatted/startDomain.pkl', 'w+b') as stdFile:
        pickle.dump(startDomain, stdFile)
    outPutFile = open(sys.argv[1] + '/formatted/startDomain.txt', 'w')
    for key in startDomain:
        outPutFile.write(str(key) + '\t' + str(startDomain[key]) + '\n')
    outPutFile.close()
    # doubleEndCount
    print("We have got {} types of D-X including NULLDOM".format(len(doubleEndCount)))
    with open(sys.argv[1] + '/formatted/doubleEndCount.pkl', 'w+b') as dblEndFile:
        pickle.dump(doubleEndCount, dblEndFile)
    outPutFile = open(sys.argv[1] + '/formatted/doubleEndCount.txt', 'w')
    for key in doubleEndCount:
        outPutFile.write(str(key) + '\t' + str(doubleEndCount[key]) + '\n')
    outPutFile.close()
    # doubleEndType
    with open(sys.argv[1] + '/formatted/doubleEndType.pkl', 'w+b') as dblEndTFile:
        pickle.dump(doubleEndType, dblEndTFile)
    outPutFile = open(sys.argv[1] + '/formatted/doubleEndType.txt', 'w')
    for key in doubleEndType:
        outPutFile.write(str(key) + '\t' + str(doubleEndType[key]) + '\n')
    outPutFile.close()
    #TrippleMiddleDomain
    print("We have got {} of D1-X-D2".format(len(tripleMiddleDomain)))
    with open(sys.argv[1] + '/formatted/tripleMiddleDomain.pkl', 'w+b') as trplMiddleFile:
        pickle.dump(tripleMiddleDomain, trplMiddleFile)
    outPutFile = open(sys.argv[1] + '/formatted/tripleMiddleDomain.txt', 'w')
    for key in tripleMiddleDomain:
        outPutFile.write(str(key) + '\t' + str(tripleMiddleDomain[key]) + '\n')
    outPutFile.close()

    print("done")
