function M = regexp2nfsa(reg,localf) % convert a regular expression to a NFSA % see str2nfsa.m for basic nfsa definitions %note! these aren't quite the same as matlab's regexps! %here: % () is grouping % * is Kleene star % + is disjunction % @ is empty string % NOTE: % do not confuse () and @ -- they are different objects! % thus, L(@) = {@} = the language consisting of the single empty string % but L(()) = {} = the empty language if ~exist('localf','var'), global ALF, localf = ALF; else, ALF = localf; end %global ALF % alphabet determined by regexp itself %spec = '()*+@'; %ALF = setdiff(unique(reg),spec); nalf = ['@' ALF]; if all(ismember(reg,nalf)) %it's just a string M = str2nfsa(reg,localf); else % too spoiled by matlab to write a regexp parsing routine! [chunk,rest] = peeloff(reg); if isempty(chunk) % we must have peeled off a () M = null_nfsa; else M = regexp2nfsa(chunk,localf); end if ~isempty(rest) if (rest(1)=='*') M = kleene_nfsa(M); rest = rest(2:end); end if ~isempty(rest) if (rest(1)=='+') rest = rest(2:end); M = union_nfsa(M,regexp2nfsa(rest,localf)); else M = concat_nfsa(M,regexp2nfsa(rest,localf)); end end end end %remove unreachable states: M = prune_eps(M); M = remove_unreachable(M); return function [chunk,rest]=peeloff(str) str = [str,'()']; x=find(str=='('); if (x(1)>1) % we can peel off a char chunk = str(1); rest = str(2:end); % keep that open paren else % it starts with a paren cnt=0; t=1; done=0; while ~done if (str(t)==')') cnt=cnt-1; elseif (str(t)=='(') cnt=cnt+1; end t=t+1; done = (~cnt+(t>length(str))); end if cnt error('unmatched parentheses in regexp'); end chunk = str(2:t-2); rest = str(t:end); end rest = rest(1:end-2); return