)
// tag-close-table[#"TEXT"](tag, text, blank);
end block;
end method tag-close;
// Tag start defines the appropriate action to take at the beginning of an
// environment (i.e. when encountering "". This may be a null action,
// or may call "break-up" to dump the accumulated text, or may perform any
// other arbitrary action.
define constant tag-start-table :: = make();
define method tag-start(New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (New-Text :: , blank :: );
let fun = block ()
tag-start-table[New-Tag];
// exception ()
// signal("Unknown tag type: <%=>\n", New-Tag);
// tag-start-table[#"TEXT"];
end block;
fun(New-Tag, Old-Tag, Out-Text, Current-Text, File, Blank);
end method tag-start;
// This routine is called at "load time" to build the tag action tables. Note
// that "reasonable" defaults are defined for all actions so that only the
// "specialized" actions for any given environment need be specified.
define method add-tag(tags :: ,
#key add-text: AT = identity,
break-up: BU = break-up-table[#"TEXT"],
tag-close: TC = tag-close-table[#"TEXT"],
tag-start: TS = tag-start-table[#"TEXT"])
for (tag in tags)
let Tag-Symbol = as(, tag);
add-text-table[Tag-Symbol] := AT;
break-up-table[Tag-Symbol] := BU;
tag-close-table[Tag-Symbol] := TC;
tag-start-table[Tag-Symbol] := TS;
end for;
end method add-tag;
////////////////////////////////////////////////////////////////////////
// Main Driver Routines //
////////////////////////////////////////////////////////////////////////
// This is the workhorse routines. It reads in new data, searches for tags,
// and dispatches the appropriate "add-text", "tag-start", and "tag-close"
// routines. It also attempts to unwind gracefully when it encounters the end
// of the file, since many HTML data files fail to terminate all environments.
define method process-HTML(Tag :: , Out-Text :: ,
Current-Text :: , File :: ,
blank :: )
=> (Current-Text :: , blank :: );
local method is-space(ch) ch == ' ' | ch == '\t' end method;
local method tag-end(ch) ch == ' ' | ch == '\t' | ch == '>' end method;
local method not-space(ch) ch ~= ' ' & ch ~= '\t' end method;
block (return)
while (#t)
// keep crunching until EOF causes us to call "return"
let Start-Tag = sfind(Current-Text, curry(\==, '<'));
if (Start-Tag)
// There is a tag on this line, so we accumulate the text which
// precedes it and then invoke the appropriate tag actions.
Out-Text := add-text(Tag, Out-Text,
subsequence(Current-Text, end: Start-Tag));
// If a newline occurs within a tag, we must keep reading until we get
// the rest of the tag. Whitespace is simply used as a separator, so
// we substitute a space for the newline.
let End-Tag =
for (index = sfind(Current-Text, curry(\==, '>'), start: Start-Tag)
then sfind(Current-Text, curry(\==, '>'), start: Start-Tag),
until: index)
Current-Text := concatenate(Current-Text, " ", read-line(File));
finally index;
end for;
// Find the complete tag and figure out whether it is "opening" or
// "closing" an environment.
let first = sfind(Current-Text, not-space, start: Start-Tag + 1);
let Is-Close = Current-Text[first] = '/';
if (Is-Close)
first := sfind(Current-Text, not-space, start: first + 1)
end if;
let New-Tag =
as(, copy-sequence(Current-Text, start: first,
end: sfind(Current-Text, tag-end,
start: first)));
// Call the appropriate action for the tag. This may invoke
// a recursive call to "process-HTML" for start tags and will exit
// this recusive call for closing tags.
Current-Text := copy-sequence(Current-Text, start: End-Tag + 1);
if (Is-Close)
return(Current-Text, tag-close(Tag, New-Tag, Out-Text, blank));
else
let (New-Text, NewBlank) =
tag-start(New-Tag, Tag, Out-Text, Current-Text, File, blank);
Current-Text := New-Text;
blank := NewBlank;
end if;
else
// Process newlines. We ignore indentation in the next line unless we
// are inside a "" environment.
Out-Text := add-eol(add-text(Tag, Out-Text, Current-Text));
let (New-Text, newline?) = read-line(File);
let First-Real = if (Pre-Count = 0)
sfind(New-Text, not-space, failure: 0);
else 0
end if;
Current-Text := if (First-Real > 0)
copy-sequence(New-Text, start: First-Real);
else
New-Text;
end if;
end if;
end while;
exception ()
// End of file processing. Dump accumulated text and then exit.
let blank = break-up(Tag, Out-Text, blank, #f);
values("", blank);
end block
end method process-HTML;
// specialized routines to open various sourts of streams and invoke
// "process-HTML".
define method html2text(fd :: ) => ();
grab-lock(html-status-lock);
while (html-status ~= #"idle")
status-variable.value := #"aborting";
html-status := #"aborting";
wait-for-event(html-restart-event, html-status-lock);
grab-lock(html-status-lock);
// Just in case someone else stepped in ahead of us.
end while;
status-variable.value := #"active";
html-status := #"active";
release-lock(html-status-lock);
delete(text-window, "1.0", end: "end");
*linelen* := length-variable.value;
configure(text-window, width: *linelen*);
*margin* := margin-variable.value;
pre-count := 0;
prefix := "";
block ()
process-HTML(#"TEXT", make(), "", fd, #t);
exception ()
#f;
cleanup
force-output(*window-stream*);
end block;
grab-lock(html-status-lock);
broadcast-event(html-restart-event);
status-variable.value := #"idle";
html-status := #"idle";
release-lock(html-status-lock);
end method html2text;
define method html2text(file :: ) => ();
let stream = make(, locator: file);
html2text(stream);
close(stream);
end method html2text;
define method html2text(file == #t) => ();
html2text(make(, fd: 0));
end method html2text;
////////////////////////////////////////////////////////////////////////
// Specific Environment Routines //
////////////////////////////////////////////////////////////////////////
// The anonymous methods here implement the appropriate tag actions for all of
// the tags currently supported. Some are quite straightforward, while others
// may require a twisted mind to "properly appreciate" them. This
// organization does, at least, allow the processing of most tags to be
// isolated so that you needn't grok all the code at once.
add-tag(#["TEXT"], // Default environment
// Performs a "paragraph break" and recursively processes the new
// environment
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
let blank = break-up(Old-Tag, Out-Text, blank, #t);
process-HTML(New-Tag, Out-Text, Current-Text,
File, blank);
end method,
// Performs a "paragraph break" and returns to the enclosing
// environment
tag-close: method (tag :: , text :: ,
blank :: ) => (result :: );
break-up(tag, text, blank, #t);
end method,
// Breaks "text" into lines according to *margin* and *linelen*.
// Parameters blank and want-blank say whether there is a blank line
// before the current text and whether there should be one after the
// current text. The return value tells whether a blank line was
// printed.
break-up: method (text :: , blank :: ,
want-blank :: ) => (result :: );
let first = sfind(text, curry(\~=, ' '));
if (~first)
if (want-blank & ~blank) write-string("\n") end if;
blank | want-blank
else
let Text-Size = size(text);
let Find-Break =
method (first, last)
if (last >= Text-Size)
Text-Size;
else
let find = rsfind(text, curry(\=, ' '),
start: first, end: last);
if (find)
rsfind(text, curry(\~=, ' '),
start: first, end: find) + 1
else
sfind(text, curry(\=, ' '), start: first)
| size(text)
end if
end if
end method;
while (first)
let last = Find-Break(first,
first + *linelen* - *margin*);
print-with-prefix(text, start: first, end: last);
first := sfind(text, curry(\~=, ' '), start: last + 1)
end while;
if (want-blank) write-string("\n") end if;
want-blank
end if
end method);
// This tag action is used for many different tags -- it simply invokes
// "process-HTML" recursively without doing anything special to the
// accumulated text. This is handy for "lightweight" enviromentents like
// "".
define constant tag-start-recurse =
method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
process-HTML(New-Tag, Out-Text, Current-Text, File, blank);
end method;
// This tag action is a logical partner for "tag-start-recurse". It simply
// exits so that control will return to an inclosing "process-HTML" call
// without distrubing the accumulated text.
define constant tag-close-nothing =
method (tag :: , Out-Text :: , blank :: )
blank;
end method;
// Specialized "add-text" methods provide EMPHASIZED versions of "" or
// "" style environments.
add-tag(#["I", "EM", "CITE", "VAR", "DFN"],
add-text: method(text :: ) => (result :: );
if (*Icap*.value) as-uppercase(text) else text end
end method,
tag-start: tag-start-recurse,
tag-close: tag-close-nothing);
add-tag(#["B", "STRONG"],
add-text: method(text :: ) => (result :: );
if (*Bcap*.value) as-uppercase(text) else text end
end method,
tag-start: tag-start-recurse,
tag-close: tag-close-nothing);
// Anchors do nothing at all.
add-tag(#["A", "HEAD", "BODY", "UNKNOWN", "TT", "CODE", "SAMP", "KBD"],
tag-start: tag-start-recurse,
tag-close: tag-close-nothing);
// Titles are eliminated entirely -- add-text simply "adds" an empty string.
add-tag(#["TITLE"],
add-text: method(text :: ) => (res :: ); "" end method,
tag-start: tag-start-recurse,
tag-close: tag-close-nothing);
// For un-bracketed environments like "", "
", etc. we must make sure
// "tag-start" does not start a recursive call to "process-HTML". We may or
// may not want to dump accumulated text.
add-tag(#["!"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
values(Current-Text, blank);
end method);
add-tag(#["P"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
values(Current-Text,
break-up(Old-Tag, Out-Text, blank, #t));
end method);
add-tag(#["BR"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
if (Pre-Count > 0)
add-eol(Out-Text);
values(Current-Text, blank);
else
values(Current-Text,
break-up(Old-Tag, Out-Text, blank, #f));
end if;
end method);
add-tag(#["HR"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
break-up(Old-Tag, Out-Text, blank, #t);
force-output(*window-stream*);
let start-index = end-mark.value;
write-line(*window-stream*,
concatenate('-' * *linelen*, "\n"));
force-output(*window-stream*);
add-tag(bold-tag, start: start-index,
end: text-at(start-index.line,
start-index.character + *linelen*));
values(Current-Text, #t);
end method);
add-tag(#["IMG"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
break-up(Old-Tag, Out-Text, blank, #t);
write-line(*window-stream*,
concatenate(' ' * (*margin* + 4),
"*** INLINE IMAGE IGNORED ***\n"));
values(Current-Text, #t);
end method);
// Preformatted text is tricky. First we dump accumulated text. Then we
// increment the global variable "Pre-Count" which enables magic behavior in
// several standard routines. Finally, when the environment is closed, we
// split the output around the newlines and do line-by-line output so that the
// left margin will be observed.
add-tag(#["PRE"],
break-up: method (text :: , blank :: ,
want-blank :: ) => (result :: );
unless(blank) write-element(*window-stream*, '\n'); end;
let first = sfind(text, curry(\~=, '\n'));
let last = rsfind(text,
complement(rcurry(member?, "\n ")));
if (last)
while (first < last)
let endline = sfind(text, curry(\=, '\n'),
start: first, failure: last + 1);
print-with-prefix(text, start: first, end: endline);
first := endline + 1;
end while;
end if;
write-string("\n");
#t
end method,
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
let blank = break-up(Old-Tag, Out-Text, blank, #t);
block ()
Pre-Count := Pre-Count + 1;
process-HTML(New-Tag, Out-Text, Current-Text,
File, blank);
cleanup
Pre-Count := Pre-Count - 1;
end block;
end method);
// Since the following methods add nested indentation levels, we create a
// stack for the margins. A "document state" record might be cleaner, but is
// probably overkill for this particular application.
define constant margins :: = make();
add-tag(#["UL", "OL", "MENU", "DL", "BLOCKQUOTE"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank :: );
break-up(Old-Tag, Out-Text, blank, #t);
let OldCounter = counter;
block ()
push(margins, *margin*);
*margin* := *margin* + 4;
counter := 0;
process-HTML(New-Tag, Out-Text, Current-Text,
File, blank);
cleanup
*margin* := pop(margins);
counter := OldCounter;
end block;
end method);
// The "" tag causes bullets or numbers to be printed before the
// immediately following text. We use a global "prefix" variable to magically
// change the behavior of the next call to "print-with-prefix". The precise
// choice of prefix depends upon the enclosing environment.
add-tag(#["LI"],
tag-start: method (New-Tag :: , Old-Tag :: ,
Out-Text :: , Current-Text :: ,
File :: , blank :: )
=> (result :: , blank ::