
%% This file (lexicon_compiler) contains predicates
%% used in compiling the user's morpheme list into 
%% a segmental `pronunciation tree', consisting of 
%% a set of states and segment labelled arcs, 
%% and also for using that tree to find out if some 
%% sequence of segments corresponds to a lexical
%% sequence. 


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% COMPILE_LEXICON: 
%
% Performs the lexicon compilation. 

compile_lexicon:- 
   abolish(link/4), 
   abolish(word/2),         % eliminate previous compilation records. 
   abolish(final/1), 
   zero_num(state),         % reset state counter
   set_flag(lex_compilation_count,[10,0,0]), 
   nl, write('COMPILING LEXICON'), nl, 
   assert(link(pre_initial,initial,0,0)),     
   fail_drive((morph Atom  = PH1,            % Look up a user morph definition.
               translate_segments(PH1,PH2),  % Apply translation function.
               add_to_lexicon(PH2,Atom),     % Do main work of adding morpheme.
               lex_compilation_counter(Atom) % Periodically prints progress report 
	       )),                           %   during compilation
   flag(lex_compilation_count,[_,_,Total]), 
   nl, write('Total morphemes processed = '), 
   write(Total), nl,
   index_lexicon, nl,                        % Marks each state for number of complete 
                                             % morphs that can be reached from it. 
   write('(lexicon compilation completed)'), nl.     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Keeps a count of total morphs compiled. 
% Prints a message Periodically to indicate 
% progress of compilation. 
%
% The flag lex_compilation_count records three values:
% Interval  - the interval between printing of messages
% Current - the number of morphs compiled since last message printed
% Total - total morphs so far compiled. 

lex_compilation_counter(Atom):- 
   flag(lex_compilation_count,[Interval,Current,Total]), % flag recording counters  
   Total1 is Total + 1, 
   (Interval is Current+1                     % when "Interval" morphs compiled since
      -> (nl, write('Morphemes processed = '),    % since last message, print another,
          write(Total1),                      % and zero current counter
	  write('   (Current morpheme: '), 
	  write(Atom), write(')'), 
	  Current1 is 0)
       ; Current1 is Current + 1),      
   set_flag(lex_compilation_count,[Interval,Current1,Total1]). % store updated counters


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% ADD_TO_LEXICON: <segments><atomic_name>
%
% does main work of adding a morpheme to the 
% lexical FSM. The FSM is structured in terms of 
% the _absolute_ distinctiveness of  segments, 
% i.e. treat two segments as being the same just  
% in case one can be derived from the other by a 
% consistent relabelling of variables. 
% 
% To facilitate this, the links of the tree have a 
% fourth field which specifies a ground copy of the 
% actual segment (appearing in third field). The 
% grounding method is such that two `equivalent' 
% segments - in above sense - become identical when
% grounded. 
%
% States corresponding to end of morphs are are 
% marked for the identity of the morph (using its
% "Atom"), and are also assigned `final state' 
% status. This `final' status relevant to device that 
% interprets the tree (i.e. there is an implicit link 
% from all final states to the initial state of the 
% tree labelled with the boundary markder 0). 

% add_to_lexicon/2:

add_to_lexicon(Ph,Atom):- 
   macro_eval_list(Ph,Segs),               % Expand macros      
   ground_copy(Segs,Gsegs),                % Make ground copies
   add_to_lex_fsm(initial,Gsegs,Segs,Atom), !.  % proceed
   
add_to_lexicon(Ph,Atom):-                % Warn failure - typically due to
   nl, write('>> Morheme not added: '),  % unexpandable macro. 
   write(Atom), write(' = '), write(Ph).

% add_to_lexicon/4:

add_to_lex_fsm(State,[],[],Atom):-     % Finished recursion, 
   word(State,Atom), !,                % but morph already present. 
   nl, write('duplicate morpheme in lexicon: '), 
   write(Atom).                        % Warn. 
   
add_to_lex_fsm(State,[],[],Atom):-     % Finished recursion, morph not already
   final(State), !,                    % present. State already has `final' status. 
   assert(word(State,Atom)).           % Mark the state with atomic identity of
                                       % the morph. 
				       
add_to_lex_fsm(State,[],[],Atom):-   % Finished recursion, morph not already
   assert(final(State)),             % present. Assign `final' status to State. 
   assert(word(State,Atom)).         % Mark the state with atomic identity of
                                     % the morph. 
   
add_to_lex_fsm(State,[G|Gs],[_Seg|Segs],Atom):-  % Recursing down segment list. 
   link(State,Nextstate,_,G), !,            % Link for this segment already present.
   add_to_lex_fsm(Nextstate,Gs,Segs,Atom).  % Recurse. 
   
add_to_lex_fsm(State,[G|Gs],[Seg|Segs],Atom):- % Recursing down segment list. 
                                               % No link for next segment. 
   new_state(Newstate),                        % Create new state label. 
   assert(link(State,Newstate,Seg,G)),         % Add link. 
   add_to_lex_fsm(Newstate,Gs,Segs,Atom).      % Recurse. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% INDEX_LEXICON
%
% Marks each state for number of complete 
% morphs that can be reached from it. 

index_lexicon:- 
  abolish(possible_words/2),   % Abolish previous record. 
  index_state(initial,_).      % Begin at the top. 

% index_state/2:

index_state(State,NewCount):- 
  index_successors(State,Count),  % Index successors to current state.
                                  % Count is the total for morphs reachable
				  % via successors. 
  count_words(State,Here),        % Add up morphs ending at this state. 
  NewCount is Count + Here,       % Sum is total reachable from here. 
  assert(possible_words(State,NewCount)).   % Record 
 

%%%%%%%%

% Index the successors of state State, and return count 
% of total morphs reachable from those successor states. 

index_successors(State,Count):- 
  set_of(Nextstate,
         S1^S2^link(State,Nextstate,S1,S2),
	 Successors),                 % Find successors to State. 
  index_state_list(Successors,Count). % Index them and return total. 
 

%%%%%%%%

% Index each of a list of states, and return count 
% of total morphs reachable from those states. 

index_state_list([],0). 
index_state_list([State|States],NewCount):- 
  index_state(State,Count), 
  index_state_list(States,RestCount), 
  NewCount is Count + RestCount. 

%%%%%%%%

% Count number of morphs ending at state State. 

count_words(State,N):- 
   set_of(W,word(State,W),Ws), 
   length(Ws,N). 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% TRAVERSE_LEX_FSM <segment_list> <state1> <state2>
% 
% succeeds if can move from state  <state1> to <state2> 
% via some sequence of links labelled with segments that 
% can unify with the segments in <segment_list> 

traverse_lex_fsm([],State,State).   
traverse_lex_fsm([X|Xs],State,Endstate):-
  transition(State,NewState,X), 
  traverse_lex_fsm(Xs,NewState,Endstate).

%%%%%%%%%%%%%%%

% Succeeds if can transition from State to NextState via 
% a link labelled with segment Seg 

transition(State,NextState,Seg):- 
   link(State,NextState,Seg,_).
   
transition(State,initial,0):-  % Special case: Can transition from any
   final(State).               % final state to the initial state if
                               % segment is the boundary marker. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Translate a list of segments according to the `translation 
% function' on segments defined by "translate_segment" given 
% below. This Predicate is applied to the segment list of 
% morpheme definitions, and is there to simplify the task of 
% stating the lexicon. E.g. the version of translate_segment
% below which just adds a "@" to a segment specification, 
% means that we can write the segment list for hand as
% just [h,a,n,d] rather than [@h, @a, @n, @d]

translate_segments([],[]). 
translate_segments([A|As],[B|Bs]):- 
   translate_segment(A,B), 
   translate_segments(As,Bs). 

%%%%%%%%%%%%%%

% translate a single segment. 

%translate_segment(0,0):- !.   % boundary marker special case. 
translate_segment(A,(@ A)).   % add an @ to segment. 

%% Some alternative versions: 
%% translate_segment((@ A),A).     % macro -> atom name
%% translate_segment(A,A).         % identity function

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

