; -*- Scheme -*-
;
; $Id: string12.scm,v 1.1 1993/08/28 12:21:31 bevan Exp $
;

;+doc
; procedure: substring:split-by-string
; arguments: search-method separator source start end
; signature: substring-searcher x string -> string x int x int -> list[string]
; pre:       (and (<= start end (string-length source))
;                 (<= (string-length separator) (string-length source)))
;
; Returns a list of words in SOURCE that are delimted by
; SEPARATOR using SEARCH-METHOD to find the occurances of SEPARATOR in
; SOURCE between START (inclusive) and END (exclusive).
;
; There are a number of decisions you can make about what you do with
; "empty" words when you write a function like this.  See the
; following examples for an explanation of what the defaults are here.
;
; A simple example.  "foo" is being used as the pattern to split on.
; This gives a "normal" result.
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "foo")
;    "abcfoodeffooghi")
; ("abc" "def" "ghi")
; 
; The following show what happens when the split pattern is at
; either end of the string :-
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "foo")
;    "foodeffooghi")
; ("" "def" "ghi")
;
; The next two examples show some possibly confusing situations you
; can get into with separators which overlap.
;
; In this the separator either appears twice or four times depending
; on how you look at overlapping patterns.
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "ff")
;    "abcfffdefffghi" 0 14)
; ("abc" "fde" "fghi")
;
; I've arbitrarily decided on the above result (i.e. decided that it
; appears twice).  If you can think of a good reason to use the
; other approach, then drop me a note.
;
; The following shows that when a pattern isn't at the start or end,
; it is treated as if there is a null string there.
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "ff")
;    "abcfffdeffffghi" 0 15)
; ("abc" "fde" "" "ghi")
;
; A simple example of what happens if the string doesn't contain any
; characters :-
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "ff") "")
;    "abcfffdeffffghi" 0 15)
; ("")
;
; If you search for the empty string, you get back all the
; characters in the string.
;
; > ((substring:split-by-string substring:find-by-knuth-morris-pratt "")
;    "abcfffdeffffghi" 0 15)
; ("" "a" "b" "c" "f" "f" "f" "d" "e" "f" "f" "f" "f" "g" "h" "i" "")
;-doc

(define substring:split-by-string
  (lambda (string-search-method separator)
    (let ((string-finder (string-search-method separator))
	  (sep-len (string-length separator)))
      (lambda (s ss se)
	(let ((yes (lambda (se k ss)
		     (if (< se ss)
			 (k ss)
			 (cons (substring s ss se) (k (+ se sep-len))))))
	      (no (lambda (ss) (list (substring s ss se)))))
	  (string-finder s ss se yes no ss))))))

;-------------

;+doc
; procedure: string:split-by-string
; arguments: search-method separator source
; signature: substring-searcher x string -> string -> list[string]
; pre:       (<= (string-length sepearator) (string-length source))
;
; Returns a list of words in SOURCE that are delimted by
; SEPARATOR using SEARCH-METHOD to find the occurances of SEPARATOR in
; SOURCE.  There are a number of decisions you can make
; about what you do with "empty" words when you write a function
; like this.  See the following examples for an explanation of what
; the defaults are here.
;
; A simple example.  "foo" is being used as the pattern to split on.
; This gives a "normal" result.
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "foo")
;    "abcfoodeffooghi")
; ("abc" "def" "ghi")
; 
; The following show what happens when the split pattern is at
; either end of the string :-
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "foo")
;    "foodeffooghi")
; ("" "def" "ghi")
;
; The next two examples show some possibly confusing situations you
; can get into with separators which overlap.
;
; In this the separator either appears twice or four times depending
; on how you look at overlapping patterns.
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "ff")
;    "abcfffdefffghi")
; ("abc" "fde" "fghi")
;
; I've arbitrarily decided on the above result (i.e. decided that it
; appears twice).  If you can think of a good reason to use the
; other approach, then drop me a note.
;
; The following shows that when a pattern isn't at the start or end,
; it is treated as if there is a null string there.
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "ff")
;    "abcfffdeffffghi")
; ("abc" "fde" "" "ghi")
;
; A simple example of what happens if the string doesn't contain any
; characters :-
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "ff") "")
;    "abcfffdeffffghi")
; ("")
;
; If you search for the empty string, you get back all the
; characters in the string.
;
; > ((string:split-by-string substring:find-by-knuth-morris-pratt "")
;    "abcfffdeffffghi")
; ("" "a" "b" "c" "f" "f" "f" "d" "e" "f" "f" "f" "f" "g" "h" "i" "")
;-doc

(define string:split-by-string
  (lambda (method sep)
    (let ((search (substring:split-by-string method sep)))
      (lambda (source)
	(search source 0 (string-length source))))))

; eof
