function M = regexp2dfsa(reg,localf) % convert a regular expression to a DFSA % see str2dfsa.m for basic dfsa definitions %note! these aren't quite the same as matlab's regexps! %here: % () is grouping % * is Kleene star % + is disjunction % @ is empty string % NOTE: % do not confuse () and @ -- they are different objects! % thus, L(@) = {@} = the language consisting of the single empty string % but L(()) = {} = the empty language if ~exist('localf','var'), global ALF, localf = ALF; else, global ALF, ALF = localf; end %global ALF nalf = ['@' ALF]; %fprintf('%s \n',reg) if all(ismember(reg,nalf)) %it's just a string M = str2nfsa(reg,localf); else % too spoiled by matlab to write a regexp parsing routine! [chunk,rest] = peeloff(reg); if isempty(chunk) % we must have peeled off a () M = null_nfsa; else M = regexp2dfsa(chunk,localf); M = d2nfsa(M); end if ~isempty(rest) if (rest(1)=='*') M = kleene_nfsa(M); rest = rest(2:end); end if ~isempty(rest) if (rest(1)=='+') rest = rest(2:end); M = union_nfsa(M,d2nfsa(regexp2dfsa(rest,localf))); else M = concat_nfsa(M,d2nfsa(regexp2dfsa(rest,localf))); end end end end %remove unreachable states: M = prune_eps(M); M = remove_unreachable(M); % determinize M = n2dfsa(M); % minimize M = minimize_dfsa(M); return function [chunk,rest]=peeloff(str) str = [str,'()']; x=find(str=='('); if (x(1)>1) % we can peel off a char chunk = str(1); rest = str(2:end); % keep that open paren else % it starts with a paren cnt=0; t=1; done=0; while ~done if (str(t)==')') cnt=cnt-1; elseif (str(t)=='(') cnt=cnt+1; end t=t+1; done = (~cnt+(t>length(str))); end if cnt error('unmatched parentheses in regexp'); end chunk = str(2:t-2); rest = str(t:end); end rest = rest(1:end-2); return