#!/usr/bin/perl
#
# This script is used to generate dg/dg_delta_tables.{h,c} which are used
# to determine which bits in the delta bitmask are assigned to which fields
# in the edict. We do delta encoding as follows:
#
# * We group fields that are always altered together and call them a
#   "cluster."
# * We take the clusters and assign them 1 bit in
#   the DeltaMask that is passed to {Un}PackUpdate(...) in order of frequency.
#
# The script takes as input a set of field clusters annotated with a count.
# For example:
#
# 100\ts.origin,s.old_origin
# 200\tclient
# ...
#
# The count represents the frequency of occurrance of the cluster in deltas.
# NOTE: There can only be MAX_DELTA_FIELDS (DeltaMask.h) clusters.

use strict;

# ( [count, [fields...] )
my @clusters;

while (<>) {
    chomp $_;
    my ($count, $fields) = split(/\s+/, $_);
    my @fields = split(/,/, $fields);
    push @clusters, [ $count, \@fields ];
}

@clusters = sort { - ($a->[0] <=> $b->[0]) } @clusters;
my $num_clusters = scalar(@clusters);

print STDERR "num_clusters: $num_clusters\n";

open(HEADER, ">dg/dg_delta_tables.h") || die $!;

print_header(*HEADER);
print HEADER <<EOT;

#ifndef _DG_DELTA_TABLES_H_
#define _DG_DELTA_TABLES_H_

#include <dg/dg_delta_encoding.h>

#define NUM_FIELDS  ($num_clusters+1)

extern dg_cluster_t dg_clusters[];

#endif
EOT

close(HEADER);

open(SOURCE, ">dg/dg_delta_tables.c") || die $!;

print_header(*SOURCE);
print SOURCE <<EOT;

#include <cstdlib>
#include <dg/dg_delta_tables.h>

EOT

for (my $i=0; $i<@clusters; $i++) {
    print_cluster(*SOURCE, $i, $clusters[$i]);
}

print SOURCE "dg_cluster_t dg_clusters[] = {\n";

for (my $i=0; $i<$num_clusters; $i++) {
    print_field(*SOURCE, $i, $clusters[$i]);
}

print_empty_field(*SOURCE);
print SOURCE "};\n";

close(SOURCE);

###############################################################################

sub min {
    return $_[0] < $_[1] ? $_[0] : $_[1];
}

sub print_header {
    my $fp = shift;
    my $time = scalar localtime();
    print $fp <<EOT;
/*
 * THIS FILE IS AUTOMATICALLY GENERATED FROM MakeClusterTables.pl.
 * DO NOT EDIT BY HAND!!!
 *
 * Generated on: $time
 */
EOT
}

sub print_cluster {
    my $fp = shift;
    my $index = shift;
    my $cluster = shift;

    my @fields = @{$cluster->[1]};
    map { $_ = "\"$_\"" } @fields;

    for (my $i=0; $i<@fields; $i++) {
	print $fp "static dg_field_key_t CLUSTER_$index\_FIELD_$i = {" .
	    $fields[$i] . ", 0 };\n";
    }

    print $fp "static dg_field_key_t CLUSTER_$index\[\] = { ";
    for (my $i=0; $i<@fields; $i++) {
	if ($i != 0) { print $fp ", "; }
	print $fp "CLUSTER_$index\_FIELD_$i";
    }
    print $fp " };\n\n";
}

sub print_field {
    my $fp = shift;
    my $index = shift;
    my $cluster = shift;

    my $count  = $cluster->[0];
    my @fields = @{$cluster->[1]};
    map { $_ = "\"$_\"" } @fields;
    my $num_fields = scalar @fields;
    #print $fp "    { $num_fields, { " . join(", ", @fields) . " } },     // $count\n";
    print $fp "    { $index, $num_fields, CLUSTER_$index },     // $count\n";
}

sub print_empty_field {
    my $fp = shift;
    print $fp "    { 0, NULL },\n";
}
