/* Copyright (c) 1993 by The Johns Hopkins University */


/* 

 PEBLS:  Exemplar-Based Learning System

 For more information, contact:				

 Steven Salzberg  (salzberg@cs.jhu.edu)
 Dept. of Computer Science
 Johns Hopkins University
 Baltimore, MD  21210

*/




/* PEBLS.H:  DEFINED CONSTANTS AND TYPES */






/* ============================================================ */
/* DEFINED CONSTANTS (DO NOT MODIFY) 				*/


					/* Basic Constants */
#define ON		1
#define OFF		0
#define TRUE		1
#define FALSE		0
#define INFINITY  	99999.9
#define UNKNOWN		-1
#define CHOOSE_ANY      -1
#define NOOP		-1



#define PCOUNT 0			/* Output flags */
#define NCOUNT 1
#define UCOUNT 2
#define OCOUNT 3

					/* PEBLS Operating Modes */
#define TEST			11
#define CLASSIFY		12

					/* Data Formats */
#define	STANDARD		13
#define SUBUNITS		14

					/* Train & Test Modes */
#define SUBSET	   		15
#define SPECIFIED_GROUP		16
#define LEAVE_ONE_OUT   	17

					/* Output modes */

#define	AVERAGES_ONLY		18
#define	DETAILED		19
#define COMPLETE		20

					/* Nearest neighbor voting */
#define MAJORITY	  	60
#define WEIGHTED_DISTANCE 	61
#define THRESHOLD               62


					/* Exemplar weighting methods */
#define USED_CORRECT    	70
#define ONE_PASS	 	71
#define INCREMENT        	72
#define USER_EXEMPLAR_1  	73
#define USER_EXEMPLAR_2  	74
#define USER_EXEMPLAR_3  	75
					/* Feature Weight Standards */
#define TRIANGLE		80
#define GENETIC			81
#define USER_DEFINED            82
#define USER_FEATURE_1  	83
#define USER_FEATURE_2  	84
#define USER_FEATURE_3  	85  



#define PROTEIN_STANDARD 	90	/* Post processing methods */
#define PROTEIN_SMOOTH   	91
#define PROTEIN_SMOOTH_ONLY     92
#define USER_POSTPROC_1  	95
#define USER_POSTPROC_2  	96
#define USER_POSTPROC_3  	97



					/* ERROR CODES */

#define OP_MODE_ERR		101
#define USAGE_ERR		102
#define TRAIN_SIZE_ERR		103
#define TRIALS_ERR		104
#define CLASSES_ERR		105
#define INSTANCES_ERR   	106
#define NO_TRAIN_ERR    	107
#define UNK_CONST_ERR		108
#define UNK_EWEIGHT_ERR 	109
#define UNK_FWEIGHT_ERR 	110
#define UNK_POSTPROC_ERR 	111
#define K_NEIGHBOR_ERR		112
#define K_NEIGHBOR2_ERR 	113
#define GENETIC_ERR     	114
#define DATAFILE_ERR    	115
#define UNDECLARED_VALUE_ERR    116
#define UNDECLARED_CLASS_ERR    117
#define UNK_PARAMETER_ERR       118
#define VOTING_ERR              119
#define FEATURES_ERR		120
#define VALUES_ERR		121
#define FEATURE_VALUE_ERR       122
#define FEATURE_WEIGHT_ERR      123


/* ============================================================ */
/* DEFINED TYPES						*/



typedef struct 			/* INSTANCE TYPE */
{
    char  id[ID_LENGTH_MAX];   	/* Instance ID 	*/
    int   value[FEATURES_MAX]; 	/* Feature values */
    int   class_true;  		/* True class */
    int   class_nearest;	/* Class of nearest neighbor */
    int   class_pp;		/* Class after post processing */

    float weight;		/* Weighting parameters */
    int	  correct;
    int   used;
    int   weighted;

    int   classify_errors;	/* # times instances misclassified over N trials */
    int   trained;		/* Trained flag */
    int   offset;              	/* Offset of instance (within subunit type) */

} instance_type;




typedef struct 			/* SUBUNIT TYPE */
{
    char id[ID_LENGTH_MAX];
    int  value[SUBUNIT_LENGTH_MAX];
    int  class[SUBUNIT_LENGTH_MAX];
} subunit_type;



				/* HASH TABLE NODES */

struct node_entry		
{
    char symbol[ID_LENGTH_MAX];
    int  value;
    struct node_entry *next;
};

typedef struct node_entry node;





typedef struct
{
    int  p,n,o,u;
} output_type;




typedef struct
{
    float min, max;
    int   discretes;
    int   enable;
} cont_type;




				/* CONFIGURATION INFORMATION TYPE */
typedef struct
{
    int  operating_mode;
    char data_file[50];
    int  data_format;
    int  classes;
    int  class_name[CLASSES_MAX][ID_LENGTH_MAX];
    int  features;
    float feature_weights[FEATURES_MAX];
    int  nvalues[FEATURES_MAX]; 
    int  value_spacing;
    int  values;
    int  common_values;

    cont_type cont[FEATURES_MAX];
    int  training_mode;
    int  instances;
    int  training_instances;
    int  test_instances;

    float training_size;
    int  post_processing;
    int  smooth_window;
    int  threshold[CLASSES_MAX];
    int  precedence[CLASSES_MAX];
    int  nearest_neighbor;
    int  nearest_voting;
    int  exemplar_weighting;
    int	 feature_weighting;
    float K;
    float R;
    int   genetic_count;
    float genetic_adj;
    int  trials;
    int  output_mode;
    int  debug;

} config_type;





/* FUNCTION PROTOTYPES */

void protein_post_process(void);
int nearest_majority_vote(int k, int nearest[]);
int nearest_weighted_distance_vote(int k, int nearest[], float distances[]);
void update_nearest_list(int i, float dist, int k, int nearest[], float distances[]);
void nearest_neighbor(int example, int k, int nearest_list[], float distances[], int weighting);
void train_instance(int);
void leave_one_out(void);
void train_subset();
void train_specified_group(void);
void train(void);
void test(void);
void initialize_data(void);
void initialize_configuration(void);
void print_configuration(void);
void check_configuration(void);
int  constant_translate(char *);
void process_configuration_entry(char line[]);
void read_configuration_file(char filename[]);
void initialize(char filename[]);

void print_count(void);
void initialize_training(void);
void print_distance_tables(void);
float dtable_entry(int f, int v1, int v2);
float MVDM(int x, int y, int weighting);
void build_distance_tables(void);

void set_feature_weights(int shape);
float W(int i);
void print_exemplar_weights(void);
void exemplar_weights_used_correct(void);
void exemplar_weights_one_pass(void);
void set_exemplar_weights(void);

int hash(char symbol[]);
void initialize_classtab(void);
int classtab_lookup(char filename[]);
void classtab_insert(char class_name[], int value);
void initialize_symtab(void);
int symtab_lookup(int feature, char symbol[]);
void symtab_insert(int feature_index, char symbol[], int value);

void standard_reader(void);
void subunit_to_instances(subunit_type *subunit, int length, int training);
void subunit_reader(void);

float f_random(float max);
int   i_random(int max);
void  DEBUG(int i);
int   round(float x);
float corr_coeff(double p, double n, double u, double o);
void  print_instances(void);
void  shuffle(int arr[], int tinst);
void  error(int code, char *string);

void  initialize_output(void);
void  compute_output_totals(void);
float get_average(int class, int count);
void  print_averages(void);
void  print_output(void);
void  print_classification_results(void);
void  update_single_output(int instance, int nearest_class, int trial);
void  update_output(int trial);





