;;;; -*- Scheme -*-
;;;; $Id: string-extns.scm,v 1.22 1992/01/26 13:33:45 bevan Exp $

;;;+file-overview
;;;
;;; Various misc. functions that operate on strings e.g. changing the
;;; case, finding substrings, padding with other strings ... etc.
;;;
;;; Ideas + interfaces come from various languages like :- APL,
;;; CommonLisp, Icon, Python and Perl.
;;;
;;; The definitions here are written for portability rather than for
;;; speed.  If you really need fast versions, I suggest you re-code in
;;; a low level language like C.
;;;
;;;+copyright
;;;
;;; Copyright (c) 1991 Department of Computer Science, University of Manchester
;;; All rights reserved.
;;;
;;;-copyright
;;;+author
;;;
;;; Stephen J. Bevan <bevan@cs.man.ac.uk>
;;;
;;;-author
;;;+usage
;;;
;;; To make sure all the functions work correctly on your system, load
;;; this file in and then type :-
;;;
;;; > (string:test:all)
;;;
;;; or if you have the char-set module available
;;;
;;; > (string:test:all 'someDummyArgument)
;;;
;;; If everything is ok, this will display a message indicating all
;;; tests were passed.
;;;
;;; Passes all tests on :- scm2d, elk-1.3
;;; Fails tests on :- T 3.1, UMB 2.2 (both have a different idea of
;;;                                   what the `end' parameter for
;;;                                   substring should represent)
;;;
;;; Some of the functions here find the positions of
;;; strings/characters in other strings.  They attempt to find all of
;;; the possible positions (i.e. like the Icon functions they are
;;; based on) by using continuations.  At the time of writing there is
;;; no standard for functions that return multiple values.  This means
;;; that to be portable, these find functions must take the arguments
;;; which indicate what to do next i.e. the continuation.  For those
;;; happy with continuations, this is no problem, but for those that
;;; are not, this can look very confusing.  If your Scheme has
;;; multiple value returns, I suggest you recode the find functions
;;; using them.  I will do this and include both versions as soon as I
;;; can get my hands on a Scheme with multiple value returns.
;;;
;;; If you've looked at the find functions, you might also be
;;; wondering why they take a `result' parameter.  This has been added
;;; so that instead of having to update positions as side effects
;;; between each continuation of the find functions, you can pass the result
;;; along.  Only one parameter is necessary as if you have any more,
;;; you can always bundle them up into a structure of some sort.
;;;
;;; Take a look at the string:splitByXXX functions for a good
;;; example of how the result parameter can be very useful.
;;;
;;; If you have _anything_ to say about the code, please mail me at
;;; the above address.  I welcome comments and criticisms alike.
;;; Some example of what you may or may not like are the :-
;;;   * general Scheme style e.g. `let' loops rather than using `do'
;;;   * indentation style
;;;   * choice of variable/function names.
;;;   * length of variable/function names.
;;;
;;;-usage
;;;+extensions
;;;
;;; provide/require
;;;   a la old CommonLisp.  Just remove the line if you Scheme doesn't use
;;;   provide/require
;;;
;;; error
;;;   Most Schemes have one of these, if your doesn't then just
;;;   uncomment the one below
;;;
;;;-extensions

;;; A few of the functions defined here operate on char-sets, (Icon like
;;; csets), if you haven't got these, then just comment out the
;;; following require.
;(require 'char-set)


;;; If your system doesn't have "error", then try the following.  It should
;;; work ok for any errors produced in this file.  Note to stop
;;; processing in a portable way, it goes into an infinite loop!
;(define error
;  (lambda (function-error-occurs-in error-message-to-print . any-arguments)
;    (display "ERROR : ")
;    (display function-error-occurs-in)
;    (display " ")
;    (display error-message-to-print)
;    (for-each
;     (lambda (argument)
;       (display " ")
;       (display argument))
;     any-arguments)
;    (newline)
;    (let loop ()			; infinite loop
;      (loop))))			; to stop processing continuing.


;;;+fs
;;; Produce a copy of the string `string' such that for
;;; every word in the copy, the first character of the word, if
;;; case-modifiable, is uppercase and any other case modifiable
;;; characters in the word are lowercase.  A word is defined here as a
;;; string consisting of alphanumeric characters or digits, delimited
;;; at each end either by a non-alphanumeric character or by and end
;;; of string.  Optional start and end positions can also be given.
;;; If these are not defined, then 0 and (string-length
;;; `string') are used. 
;;; Based on Common-lisp string-capitalize
;;;- +fe
;;; > (string:capitalize "occlUDed cASEmenTs")
;;; "Occluded Casements"
;;; > (string:capitalize "occlUDed cASEmenTs" 2)
;;; "ocCluded Casements"
;;; > (string:capitalize "occlUDed cASEmenTs" 2 11)
;;; "ocCluded CaSEmenTs"
;;;-
;;; Internally this isn't pure i.e. it uses a xxx! function.  It could
;;; be made pure by using append, but this is a lot slower.
(define (string:capitalize string . optionals)
  (let* ((start (if (null? optionals) 0 (car optionals)))
	 (end (if (and (not (null? optionals)) (> (length optionals) 1))
		  (cadr optionals)
		  (string-length string)))
	 (capitalized-string (string:copy string)))
    (let loop ((capitalize-char? #t) (current-pos start))
      (if (= current-pos end)
	  capitalized-string
	  (let* ((previous-char (string-ref string current-pos))
		 (new-char (if capitalize-char?
			      (char-upcase previous-char)
			      (char-downcase previous-char))))
	    (string-set! capitalized-string current-pos new-char)
	    (loop (not (or (char-alphabetic? previous-char)
			   (char-numeric? previous-char)))
		  (+ 1 current-pos)))))))



;;;+fs
;;; Center the string `source' in a string of `size',
;;; padding on the left and right, if necessary with the `padding' string.
;;; If `source' cannot be centered exactly, it is placed left
;;; of center.  Truncation is then done at the left and right as necessary.
;;; This is based on the Icon function `center(s1, i, s2)'
;;; Note this does not do the same thing as the Icon function for the case
;;; where `size' < (string-length source).  If
;;; anybody can explain why the Icon function produces "etroit" in the
;;; third case, I'll be happy to change it.
;;;- +fe
;;; > (string:center-with-string "Detroit" 10 "+")
;;; "+Detroit++"
;;; > (string:center-with-string "Detroit" 20 "+*+")
;;; "+*++*+Detroit+*++*++"
;;; > (string:center-with-string "Detroit" 6 "+*+")
;;; "Detroi"
;;;-
(define (string:center-with-string source size padding)
  (let ((source-length (string-length source)))
    (cond
     ((> size source-length)
        (let* ((left (quotient (- size source-length) 2))
	       (right (- size (+ left source-length))))
	  ;; Split into two separate appends for UMB Scheme.
	  (string-append
	     (string:replicate-to-size padding left)
	     (string-append
	        source
		(string:replicate-to-size padding right)))))
     ((< size source-length)
        (let* ((left (quotient (- source-length size) 2))
	       (right (+ left size)))
	  (substring source left right)))
     (else
        source))))



;;;+fs
;;; Center the `source' string in a string of with the given `size',
;;; padding on the left and right, if necessary with spaces.
;;; If the `size' is less than the length of the original string, the
;;; string is truncated.
;;;- +fe
;;; > (string:center "Detroit" 10)
;;; " Detroit  "
;;; > (string:center "Detroit" 6)
;;; "Detroi"
;;;-
(define (string:center string-to-center size-to-center-in)
  (string:center-with-string string-to-center size-to-center-in " "))



;;;+fs
;;; Returns another copy of the `string-to-copy'
;;; If your Scheme has a function like this, then replace this by it.
;;;-
;;; There is more than one version below.
;;; The original one I used was version 1.  However, this doesn't seem
;;; to work on T version 3.1 (due to a bug in substring I think), so I
;;; produced the second version. 
;;; Which you chose is up to you.  If version 1 works, then I'd expect
;;; it to be quicker than version 2.  
;;;
(define (string:copy original)

  ;; version 1
;  (substring original 0 (string-length original))

  ;; version 2
;  (let* ((original-length (string-length original))
;	  (copied-string (make-string original-length)))
;    (let loop ((current-pos 0))
;      (if (= current-pos original-length)
;	  copied-string
;	  (begin
;	    (string-set! copied-string current-pos (string-ref original current-pos))
;	    (loop (+ 1 current-pos))))))

  ;; Most Schemes have a string copy built in (if the follow the
  ;; Revised Reports, then it usually called string-copy), so use that
  (string-copy original)
  )



;;;+fs
;;; This is an implementation of a function suggested by
;;; Dan Bernstein <brnstnd@kramden.acf.nyu.edu> in alt.lang.cfutures as part
;;; of his `strinf' (infinite length strings) library.   The description
;;; Dan gives is :-
;;;
;;;  int strinfdiff(sf,tf) returns 0 if sf and tf are the same, -1 if sf is
;;;  a prefix of tf, -2 if it is not a prefix but is strictly smaller
;;;  (compared in dictionary order with individual chars unsigned), 1 if tf
;;;  is a prefix of sf, and 2 if tf is smaller than sf but not a prefix.
;;;-
(define (string:diff sf tf)
  (let ((sf-length (string-length sf))
	(tf-length (string-length tf)))
    (let ((end-pos (min sf-length tf-length)))
      (let loop ((pos 0)
		 (previous-comparison 0))
	(if (= end-pos pos)
	    (cond
	      ((< sf-length tf-length)
	         (if (zero? previous-comparison) -1 previous-comparison))
	      ((> sf-length tf-length)
	         (if (zero? previous-comparison) 1 previous-comparison))
	      (else previous-comparison))
	    (loop
	      (+ 1 pos)
	      (cond
	        ((char<? (string-ref sf pos) (string-ref tf pos))
		   (case previous-comparison
		     ((0) -2)
		     ((1) 2)
		     (else previous-comparison)))
		((char>? (string-ref sf pos) (string-ref tf pos))
		   (case previous-comparison
		     ((0) 2)
		     ((-1) -2)
		     (else previous-comparison)))
		(else previous-comparison))))))))



;;;+fs
;;; Returns a copy of the `string' with all uppercase
;;; characters replaced by lowercase ones.  More precisely, each
;;; character of the result string is produced by applying the
;;; function char-upcase to the corresponding character of `string'
;;; Optional start and end positions can also be given if these are not
;;; defined then 0 and (string-length `string') are used.
;;; Based on Common-lisp string-downcase
;;;- +fe
;;; > (string:downcase "occlUDed cASEmenTs")
;;; "occluded casements"
;;; > (string:downcase "occlUDed cASEmenTs" 5)
;;; "occlUded casements"
;;; > (string:downcase "occlUDed cASEmenTs" 5 15)
;;; "occlUded casemenTs"
;;;-
;;; Internally this isn't pure i.e. it uses a xxx! function.  It could
;;; be made pure by using append, but this is a lot slower.
;;;
(define (string:downcase string . optionals)
  (let* ((start (if (null? optionals) 0 (car optionals)))
	 (end (if (and (not (null? optionals)) (> (length optionals) 1))
		  (cadr optionals)
		  (string-length string)))
	 (downcased-string (string:copy string)))
    (let loop ((string-pos start))
      (if (= string-pos end)
	  downcased-string
	  (begin
	    (string-set! downcased-string
			 string-pos
			 (char-downcase (string-ref string string-pos)))
	    (loop (+ string-pos 1)))))))



;;;+fs
;;; Searches of the `pattern' in the `string' by taking substrings
;;; and checking if they are string=?.  Unless your Scheme compiler
;;; optimizes away the substring (which I doubt), this will generate a
;;; lot of garbage and therefore be quite slow.
;;; See `string:find-string' for a full explanation of all the parameters.
;;;
;;; The worst case performance of this algorithm is :- mn -m^2 + m
;;; where m and n are the lengths of the pattern and string respectively.
;;;-
(define (string:find-by-substring
	 string
	 pattern
	 start-pos
	 end-pos
	 if-found
	 if-not-found
	 initial-result)
  (let* ((pattern-length (string-length pattern))
	 (string-end (- end-pos pattern-length)))
    (if (or (>= start-pos end-pos)
	    (< string-end 0)
	    (> end-pos (string-length string)))
	(if-not-found initial-result)
	(let loop ((string-pos start-pos)
		   (result-so-far initial-result))
	  (cond
	    ((> string-pos string-end)
	       (if-not-found result-so-far))
	    ((string=?
	        pattern
		(substring string string-pos (+ string-pos pattern-length)))
	       (let ((next-string-pos
		      (lambda (result) (loop (+ 1 string-pos) result))))
		 (if-found string-pos next-string-pos result-so-far)))
	    (else
	     (loop (+ 1 string-pos)
		   result-so-far)))))))


  
;;;+fs
;;; This uses the same basic method as string:find-by-substring,
;;; except that instead of substringing it uses a separate loop to check
;;; each of the characters individually.
;;;
;;; See `string:find-string' for a full explanation of all the parameters.
;;;
;;; The worst case performance of this algorithm is :- mn -m^2 + m
;;; where m and n are the lengths of the pattern and string respectively.
;;;-
(define (string:find-by-string-ref
	 source
	 pattern
	 start-pos
	 end-pos
	 if-found
	 if-not-found
	 initial-result)
  (let* ((pattern-length (string-length pattern))
	 (source-end (- end-pos pattern-length)))
    (if (or (<= end-pos start-pos) (< source-end 0))
	(if-not-found initial-result)
	(let start-comparision ((source-start start-pos)
			       (result-so-far initial-result))
	  (if (> source-start source-end)
	      (if-not-found result-so-far)
	      (let check-rest ((source-pos source-start) (pattern-pos 0))
		(cond
		  ((= pattern-pos pattern-length)
		     (let ((next-string-pos
			    (lambda (result)
			      (start-comparision (+ 1 source-start) result))))
		       (if-found source-start next-string-pos result-so-far)))
		  ((char=? (string-ref source source-pos)
			   (string-ref pattern pattern-pos))
		     (check-rest (+ 1 source-pos) (+ 1 pattern-pos)))
		  (else
		     (start-comparision (+ source-start 1) result-so-far)))))))))
  


;;;+fs
;;; This uses the algorithm described in :-
;;;
;;; "Fast Pattern Matching in Strings"
;;; SIAM J. Computing 6(2):323-350 1977
;;; D. E. Knuth, J. H. Morris and V. R. Pratt
;;;
;;; I actually read about the algorithm in :-
;;;
;;; "Pattern Matching in Strings"
;;; Alfred V. Aho
;;; pages 325-347 of
;;; Formal Language Theory - Perspectives and Open Problems
;;; Ronald V. Brook (editor)
;;;
;;; See `string:find-string' for a full explanation of all the parameters.
;;;
;;; This algorithm is O(m + n) where m and n are the 
;;; lengths of the pattern and string respectively
;;;-
(define (string:find-by-knuth-morris-pratt
	 source
	 pattern
	 start-pos
	 end-pos
	 if-found
	 if-not-found
	 initial-result)
  ;; Generate a vector containing the positions on which to (re)start
  ;; a search of the `pattern' string.
  ;;
  (define (generate-next-positions pattern)
    (let* ((pattern-length (string-length pattern))
	   (pattern-end (- pattern-length 1))
	   (next-positions (make-vector pattern-length)))
      (vector-set! next-positions 0 -1)
      (let loop ((pattern-pos 0) (position -1))
	(cond
	  ((= pattern-pos pattern-end)
	     next-positions)
	  ((or (= position -1)
	       (char=? (string-ref pattern pattern-pos)
		       (string-ref pattern position)))
	     (begin
	       (let ((new-pattern-pos (+ 1 pattern-pos))
		     (new-position (+ 1 position)))
		 (vector-set! next-positions new-pattern-pos new-position)
		 (loop new-pattern-pos new-position))))
	  (else
	     (loop pattern-pos (vector-ref next-positions position)))))))
  
  (let ((source-length (string-length source))
	(pattern-length (string-length pattern)))
    (let ((next-positions (if (zero? pattern-length)
			     'dummy-argument-because-not-used
			     (generate-next-positions pattern))))
      (let loop ((source-pos start-pos)
		 (pattern-pos 0)
		 (result-so-far initial-result))
	(cond
	   ((= pattern-pos pattern-length)
 	      (let* ((found-pos
		        (- source-pos pattern-length))
		     (next-string-pos
		        (if (>= source-pos end-pos)
			    if-not-found
			    (lambda (result) (loop (+ 1 found-pos) 0 result)))))
		(if-found found-pos next-string-pos result-so-far)))
	   ((>= source-pos end-pos)
	      (if-not-found result-so-far))
	   ((or (negative? pattern-pos)
		(char=? (string-ref source source-pos)
			(string-ref pattern pattern-pos)))
	      (loop (+ 1 source-pos) (+ 1 pattern-pos) result-so-far))
	   (else
	      (loop source-pos
		    (vector-ref next-positions pattern-pos)
		    result-so-far)))))))



;;;+fs
;;; This uses the algorithm described in :-
;;;
;;; "Fast String Searching Algorithm"
;;; R. S. Boyer and J. S. Moore
;;; CACM 20(10):262-272 October 1977
;;;
;;; I actually read about the algorithm in :-
;;;
;;; "Pattern Matching in Strings"
;;; Alfred V. Aho
;;; pages 325-347 of
;;; Formal Language Theory - Perspectives and Open Problems
;;; Ronald V. Brook (editor)
;;;
;;; See `string:find-string' for a full explanation of all the parameters.
;;;-
(define (string:find-by-boyer-moore
	 source
	 pattern
	 start-pos
	 end-pos
	 if-found
	 if-not-found
	 initial-result)
  (error 'string:find-by-boyer-moore "not implemented"))



;;;+fs
;;; Find all the positions at which `pattern' appears in the
;;; `source'.  It is unlikely that you will want to call this
;;; function directly.  Usually it will be hidden behind another one
;;; that supplies default values for the numerous parameters it takes.
;;; This is based on the Icon function `find(s1, s2, i, j)'
;;;
;;; `start-pos'
;;;   the position in the `source' at which to start looking.
;;; `end-pos'
;;;   the position in the `source' at which to stop looking.
;;; `if-found'
;;;   a function taking three arguments: the position at which the
;;;   `pattern' was found, a continuation and the result so far.
;;;   The function can do whatever you want with the given position.
;;;   If you want to continue on to find the next string, then apply
;;;   the continuation to the result so far (or a new result built
;;;   from this).  If you want to stop processing, just return the
;;;   result so far
;;; `if-not-found'
;;;   a function taking one argument, the result.
;;;   This is called if the `pattern' is not found in the
;;;   `source' or if any of the parameters do not have correct
;;;   ranges e.g. if `end-pos' < `start-pos'.
;;; `initial-result'
;;;   The initial result of any processing you want to do in the
;;;   `if-found'/`if-not-found' sections.
;;;-
;;; Set this to whichever find function is generally faster on your machine
;;; 
(define string:find-string string:find-by-string-ref)



;;;+fs
;;; Find the first position of `pattern' in the substring of
;;; `source' bounded by `start' and `end'. Returns the position if the
;;; `pattern' is found or #f if it is not.
;;;- +fe
;;; > (string:find-first-start-end
;;; >    string:find-by-knuth-morris-pratt
;;; >    " abc def ghi abc"
;;; >    "abc"
;;; >    0
;;; >    16)
;;; 1
;;;
;;; > (string:find-first-start-end
;;; >    string:find-by-substring
;;; >    " abc def ghi abc"
;;; >    "abc"
;;; >    2
;;; >    16)
;;; 13
;;;
;;; > (string:find-first-start-end
;;; >    string:find-by-string-ref
;;; >    " abc def ghi abc"
;;; >    "abcd"
;;; >    0
;;; >    16)
;;; #f
;;;-
(define (string:find-first-start-end search-function source pattern start end)
  (search-function 
     source
     pattern
     start
     end
     (lambda (position continuation result) position)
     (lambda (result) #f)
     'dummy-argument))



;;;+fs
;;; Find the first position of `pattern' in the substring of
;;; `source' starting at `start' and running up to the end of the `pattern'.
;;; This is the same as `string:find-first-start-end' except that the end
;;; position is calcluated for you.
;;;-
(define (string:find-first-start search-function source pattern start)
  (string:find-first-start-end
     search-function
     source
     pattern
     start
     (string-length source)))



;;;+fs
;;; Find the position of the first `pattern' in `source'
;;; Returns the position or #f if the position could not be found.
;;; This is the same as `string:find-first-start-end' except with 0 and
;;; the length of the `source' as the defaults for
;;; `start' and `end'
;;;-
(define (string:find-first search-function source pattern)
  (string:find-first-start-end
     search-function
     source 
     pattern 
     0
     (string-length source)))
   


;;;+fs
;;; Find all the positions at which `char' appears in
;;; the `source'.  It is unlikely that you will want to
;;; call this function directly.  Usually it will be hidden behind
;;; another one that supplies default values for the numerous
;;; parameters it takes. 
;;; This is a special case of the Icon functions `find(s1, s2, i, j)'
;;; and `upto(c, s, i, j)', where `char' is considered
;;; as a string containing one character for `find' or a character set
;;; containing one character for `upto'
;;;
;;; `start'
;;;   the position in the `source' at which to start looking.
;;; `end'
;;;   the position in the `source' at which to stop looking.
;;; `if-found'
;;;   a function taking three arguments: the position at which the
;;;   `char' was found, a continuation and the result so far.
;;;   The function can do whatever you want with the given position.
;;;   If you want to continue on to find the next string, then apply
;;;   the continuation to the result so far (or a new result built
;;;   from this).  If you want to stop processing, just return the
;;;   result so far
;;; `if-not-found'
;;;   a function taking one argument, the result.
;;;   This is called if the `char' is not found in the
;;;   `source' or if any of the parameters do not have correct
;;;   ranges e.g. if `end' < `start'.
;;; `initial-result'
;;;   The initial result of any processing you want to do in the
;;;   `if-found'/`if-not-found' sections.
;;;-
(define string:general-find-char
  (lambda (source char start end if-found if-not-found initial-result)
    (let loop ((pos start)
	       (result-so-far initial-result))
      (cond
         ((>= pos end)
	    (if-not-found result-so-far))
	 ((char=? char (string-ref source pos))
	    (let ((next-position
		   (lambda (result)
		     (loop (+ 1 pos) result))))
	      (if-found pos next-position result-so-far)))
	 (else
	    (loop (+ 1 pos) result-so-far))))))



;;;+fs
;;; Find the first position of `char' in the substring
;;; of `source' bounded by `start' and `end'.  Returns the position if
;;; the `char' is found or #f if it is not.
;;;- +fe
;;; > (string:find-char-start-end " abc def ghi abc" #\e 0 16)
;;; 6
;;; > (string:find-char-start-end " abc def ghi abc" #\x 0 16)
;;; #f
;;;-
(define (string:find-char-start-end source char start end)
  (string:general-find-char
     source
     char
     start
     end
     (lambda (position continuation result) position)
     (lambda (result) #f)
     'dummy-argument))



;;;+fs
;;; Find all the positions at which `chars' appear in the
;;; `source'.  Each time a character is found in the string, `if-found'
;;; is called with the three parameters: the position that the
;;; character was found at, a continuation to resume looking for the other
;;; characters and the result so far.
;;; If a character cannot be found, `if-not-found' is called with one
;;; argument, the result.
;;; `initial-result'
;;;   The initial result of any processing you want to do in the
;;;   `if-found'/`if-not-found' sections.
;;; Based on the Icon function `upto(c, s, i, j)'
;;;-
(define string:general-find-chars
  (lambda (source chars start end if-found if-not-found initial-result)
    (let loop ((current-position start)
	       (result-so-far initial-result))
      (cond
        ((>= current-position end)
	   (if-not-found result-so-far))
	((char-set:member? chars (string-ref source current-position))
	   (let ((next-position
		  (lambda (result)
		    (loop (+ 1 current-position) result))))
	     (if-found current-position next-position result-so-far)))
	(else
	   (loop (+ 1 current-position) result-so-far))))))



;;;+fs
;;; This is an implementation of the Icon function `bal(c1, c2, c3, s, i, j)'
;;; It searches the `source' for any `chars'
;;; that are balanced with respect to the `left-grouping-chars' and
;;; `right-grouping-chars'.
;;; The Icon book claims that this function is :-
;;; "... useful in applications thta involve the analysis of formulas,
;;; expressions and other strings that have balanced bracketing
;;; characters"
;;; I can't think that I've ever wanted to use it, but for
;;; completeness' sake I've included it here.
;;; The function has so many parameters that I doubt you would want to
;;; call it directly.  Just decide what arguments you really want and
;;; wrap all the rest up in a function with another name.
;;;
;;; Some of the less obvious parameters are :-
;;; `left-grouping-chars'
;;;   the characters that represent a left group, this will usually be
;;;   something like '(', '[', '{', ... etc. or any combination of
;;;   these.  Even though brackets are the most common thing, you
;;;   could use any characters at all.
;;; `right-grouping-chars'
;;;   the characters that represent the right group.  These are the
;;;   characters that will match the ones in `left-grouping-chars'
;;; `if-found'
;;;   a function taking the three arguments: the position at which one
;;;   of the `chars' has been found, a continuation which
;;;   will produce the next position, and the result so far.
;;; `if-not-found'
;;;   a function taking one argument, the result so far.
;;;- +fe
;;; The following is a simple example of how to find "+" balanced in
;;; an expression.  For other examples, see the tests at the end of
;;; the file.
;;; > (let ((str "((2*x)+3)+(5*y)"))
;;; >   (string:general-find-balanced-chars
;;; >     str
;;; >     (char-set:from-string "+")
;;; >     (char-set:from-string "(")
;;; >     (char-set:from-string ")")
;;; >     0
;;; >     (string-length str)
;;; >     (lambda (position continuation result) position)
;;; >     (lambda (result) #f)
;;; >     'dummy-argument))
;;; 9
;;;-
(define (string:general-find-balanced-chars
	 source
	 chars
	 left-grouping-chars
	 right-grouping-chars
	 start
	 end
	 if-found
	 if-not-found
	 initial-result)
  (let loop ((current-position start)
	     (nesting-level 0)
	     (result-so-far initial-result))
    (if (>= current-position end)
	(if-not-found result-so-far)
	(let ((current-char (string-ref source current-position)))
	  (cond
	    ((char-set:member? chars current-char)
	       (let ((next-position
		      (lambda (result)
			(loop (+ 1 current-position) nesting-level result))))
		 (if (zero? nesting-level)
		     (if-found current-position next-position result-so-far)
		     (next-position result-so-far))))
	    ((char-set:member? left-grouping-chars current-char)
	       (loop (+ current-position 1) (+ nesting-level 1) result-so-far))
	    ((char-set:member? right-grouping-chars current-char)
	       (loop (+ current-position 1) (- nesting-level 1) result-so-far))
	    (else
	       (loop (+ 1 current-position) nesting-level result-so-far)))))))



;;;+fs
;;; Finds the position of the first character in the substring of
;;; `source', bounded by `start' and `end', that is in `chars'
;;; Returns either the position or #f if no character could be found.
;;;-
(define (string:find-first-in-chars source chars start end)
  (string:general-find-chars
     source
     chars
     start
     end
     (lambda (position continuation result) position)
     (lambda (result) #f)
     'dummy-argumnet))
	   


;;;+fs
;;; Replaces all occurences of `from' in `source' with `to'
;;; This is an based on the Icon function `map(s1, s2, s3)'.
;;; All the arguments should be strings, and `from' and
;;; `to' should be the same length.
;;; The description of how to use this function covers 7 pages in the
;;; Icon book I have, rather than repeat it all, I've just copied most
;;; of the examples.  If you can't understand what it is doing from
;;; the examples, then either get a book on Icon or mail me and I will
;;; try to put together a coherent explanation.
;;;
;;; BTW I don't like using the name `map' for this function as
;;; intuitively (to me at least) `map' implies scanning over something
;;; applying a certain operation.  Any suggestions for a better name?
;;;- +fe
;;; > (string:map "bevan" "aeiou" "+%#@&")
;;; "b%v+n"
;;; > (string:map "bevan" "aeiou" "*****")
;;; "b*v*n"
;;; > (string:map "654321" "123456" "quotas")
;;; "satouq"
;;; > (string:map "124578" "12345678" "03:56:42")
;;; "035642"
;;; > (string:map "hxmysz" "hx:my:sz" "03:56:42")
;;; "035642"
;;; > (string:map "hx:my:sz" "hxmysz" "035642")
;;; "03:56:42"
;;; > (string:map "123321" "123" "-*|")
;;; "-*||*-"
;;;-
;;; You might notice that two vectors are used to create the mapping
;;; from one set of characters to another. This is hardly the most
;;; efficient way of doing it, but it works!  In a C implementation,
;;; you could have just one vector and use NULL to represent
;;; characters that are present in the source but not the from string.
;;; However, this function is meant to be used for all characters
;;; (including NULL) and so you can't use this.  The alternative
;;; approaches seem to be either two vectors (as I have used) or one
;;; vector either containing a tuple (defined/not-defined . character)
;;; or the integer representation of the character.  This would allow
;;; an integer out of the bounds of the character set to be used as
;;; the `not-defined' marker.  Which is the most efficient in terms of
;;; time/space depends on your implementation.  If Scheme ever has
;;; some sort of conditional evaluation (a la Common-lisp +/-
;;; features), I will stuff all the implementations in here and you
;;; will be able to chose which one is best.
;;;
(define (string:map source from to)
  (let* ((mapping-vector (make-vector 256))
	 (defined?-vector (make-vector 256 #f))
	 (from-length (string-length from))
	 (source-length (string-length source))
	 (result-string (make-string source-length)))
    
    (let loop ((from-pos 0))
      (if (not (= from-pos from-length))
	  (let ((pos (char->integer (string-ref from from-pos))))
	    (vector-set! mapping-vector pos (string-ref to from-pos))
	    (vector-set! defined?-vector pos #t)
	    (loop (+ from-pos 1)))))
    
    (let loop ((source-pos 0) (from-pos 0))
      (if (= source-pos source-length)
	  result-string
	  (let ((pos (char->integer (string-ref source source-pos))))
	    (string-set!
	       result-string
	       from-pos
	       (if (vector-ref defined?-vector pos)
		   (vector-ref mapping-vector pos)
		   (string-ref source source-pos)))
	    (loop (+ 1 source-pos) (+ 1 from-pos)))))))



;;;+fs
;;; This performs the same function as `left(s1, i, s2)' in Icon.
;;; This is just a front end for some of the other functions in this
;;; file.  It's only here for the sake of any Iconites who can't do
;;; without it.
;;;- +fe
;;; > (string:left "Detroit" 10 "+")
;;; "Detroit+++"
;;; > (string:left "Detroit" 6)
;;; "Detroi"
;;;-
(define (string:left source new-length . s2)
  (let ((padding (if (null? s2) " " (car s2)))
	(source-length (string-length source)))
    (cond
      ((> new-length source-length)
         (string:pad-right-with-string source new-length padding))
      ((< new-length source-length)
         (substring source 0 new-length))
      (else
         source))))



;;;+fs
;;; This is the same as pad-left-with-string, except that it uses a
;;; default value of a space as the padding string.
;;;- +fe
;;; > (string:pad-left "Detroit" 10)
;;; "   Detroit"
;;;-
(define (string:pad-left source new-length)
  (string:pad-left-with-string source new-length " "))



;;;+fs
;;; Extend the `source' with `padding' until its
;;; length is `new-length'
;;; Achieves the same effect as `right(s1, i, s2)' in Icon, when i >
;;; length of s1.
;;;- +fe
;;; > (string:pad-left-with-string "Detroit" 10 "+")
;;; "+++Detroit"
;;; > (string:pad-left-with-string "Detroit" 20 "+*+")
;;; "+*++*++*++*++Detroit"
;;; > (string:pad-left-with-string "Detroit" 4 "+*+")
;;; "Detroit"
;;;-
(define (string:pad-left-with-string source new-length padding)
  (let ((source-length (string-length source)))
    (if (< new-length source-length)
	source
	(string-append 
	   (string:replicate-to-size padding (- new-length source-length))
	   source))))
      


;;;+fs
;;; Extend the `source' with `padding' until its
;;; length is `new-length'
;;; Achieves the same effect as `left(s1, i, s2)' in Icon, when i >
;;; length of s1.
;;;- +fe
;;; > (string:pad-right-with-string "Detroit" 10 "+")
;;; "Detroit+++"
;;; > (string:pad-right-with-string "Detroit" 20 "+*+")
;;; "Detroit+*++*++*++*++"
;;; > (string:pad-right-with-string "Detroit" 4 "+*+")
;;; "Detroit"
;;;-
(define (string:pad-right-with-string source new-length padding)
  (let ((source-length (string-length source)))
    (if (< new-length source-length)
	source
	(string-append
	   source
	   (string:replicate-to-size padding (- new-length source-length))))))
      

;;;+fs
;;; This is the same as pad-right-with-string, except that it uses a
;;; default value of a space as the padding string.
;;;- +fe
;;; > (string:pad-right "Detroit" 10)
;;; "Detroit   "
;;;-
(define (string:pad-right source new-length)
  (string:pad-right-with-string source new-length " "))



;;;+fs
;;; Checks if the `prefix' is a prefix of the
;;; `source'.  If it is it returns #t
;;;-
;;; This is a loose translation of the following C by Karl Heuer.
;;;
;;; char *strpref(char const *s, char const *t) {
;;;    while (*t != '\0') if (*s++ != *t++) return (NULL);
;;;    return ((char *)s);
;;; }
;;;
;;; Note now that strdiff has now been implemented, this is 
;;; at worst redundant and at best can be implemented in terms of it.
;;;
(define (string:prefix? source prefix)
  (let ((prefix-length (string-length prefix))
	(source-length (string-length source)))
    (let loop ((pos 0))
      (cond
        ((= pos prefix-length)
	   #t)
	((= pos source-length)
	   (<= prefix-length source-length))
	((char=? (string-ref source pos) (string-ref prefix pos))
	   (loop (+ 1 pos)))
	(else
	   #f)))))



;;;+fs
;;; Generate a given `number-of-copies' of the `source'.
;;; Based on the Icon function repl(s, i)
;;;- +fe
;;; > (string:replicate "+*+" 3)
;;; "+*++*++*+"
;;; > (string:replicate "abc" 0)
;;; ""
;;;-
(define (string:replicate source number-of-copies)
  (let loop ((result "") (count number-of-copies))
    (if (zero? count)
	result
	(loop (string-append source result) (- count 1)))))



;;;+fs
;;; Geneate a string which is `size' chars long
;;; consisting on the given `source'
;;;- +fe
;;; > (string:replicate-to-size "abc" 10)
;;; "abcabcabca"
;;; > (string:replicate-to-size "abc" 1)
;;; "a"
;;; > (string:replicate-to-size "abc" 0)
;;; ""
;;; > (string:replicate-to-size ""    1)
;;; ""
;;;-
(define (string:replicate-to-size source size)
  (let ((source-length (string-length source)))
    (if (zero? source-length)
	""
	(let loop ((replicated-string "") (size-of-replicated-string 0))
	  (cond
	    ((= size-of-replicated-string size) 
	       replicated-string)
	    ((> size-of-replicated-string size)
	       (substring replicated-string 0 size))
	    (else
	       (loop (string-append replicated-string source)
		     (+ size-of-replicated-string source-length))))))))



;;;+fs
;;; Produces a string consisting of the chars of the `source'
;;; in reverse order.
;;; Based on the Icon function reverse(s)
;;;- +fe
;;; > (string:reverse "string")
;;; "gnirts"
;;; > (string:reverse "")
;;; ""
;;;-
;;; Internally this isn't pure i.e. it uses a xxx! function.  It could
;;; be made pure by using append, but this is a lot slower.
(define (string:reverse source)
  (let ((result (make-string (string-length source))))
    (let loop ((low 0) (high (string-length source)))
      (if (zero? high)
	  result
	  (begin
	    (let ((new-high (- high 1)))
	      (string-set! result low (string-ref source new-high))
	      (loop (+ 1 low) new-high)))))))



;;;+fs
;;; This performs the same function as `right(s1, i, s2)' in Icon.
;;; This is just a front end for some of the other functions in this
;;; file.  It's only here for the sake of any Iconites who can't do
;;; without it.
;;;- +fe
;;; > (string:right "Detroit" 10 "+")
;;; "+++Detroit"
;;; > (string:right "Detroit" 6)
;;; "etroit"
;;;-
(define (string:right source new-size . s2)
  (let ((padding (if (null? s2) " " (car s2)))
	(source-length (string-length source)))
    (cond
      ((> new-size source-length)
         (string:pad-left-with-string source new-size padding))
      ((< new-size source-length)
         (substring source (- source-length new-size) source-length))
      (else
         source))))



;;;+fs
;;; Rotate the `source' a given number of places to the left
;;; or right.  With no arguments, rotates once to the right.  A
;;; positive argument rotates to the right, a negative one to the left.
;;; Based on rotate in APL, though I think positive means left in APL.
;;;- +fe
;;; > (string:rotate "abcdef" 1)
;;; "fabcde"
;;; > (string:rotate "abcdef")
;;; "fabcde"
;;; > (string:rotate "abcdef" -2)
;;; "cdefab"
;;; > (string:rotate "abcdef" 8)
;;; "efabcd"
;;; > (string:rotate "abcdef" 0)
;;; "abcdef"
;;;-
(define (string:rotate source . optional-number-of-rotations)
  (let ((source-length (string-length source)))
    (let ((places-to-rotate-by
	   (if (null? optional-number-of-rotations)
	       1
	       (let ((distance (car optional-number-of-rotations)))
		 (if (< distance 0)
		     (abs (- source-length (abs distance)))
		     distance))))
	  (rotated-string (make-string source-length)))
      (let loop ((current-position 0))
	(if (= current-position source-length)
	    rotated-string
	    (begin
	      (string-set!
	         rotated-string
		 (remainder (+ current-position places-to-rotate-by) source-length)
		 (string-ref source current-position))
	      (loop (+ current-position 1))))))))



;;;+fs
;;; Returns a list of words in `source' that are delimited by
;;; `chars'. If the string is empty or contains only
;;; whitespace, then it returns the empty list.
;;; This is based on the split function in Python, which I believe is
;;; based on the Perl/Awk one.
;;;- +fe
;;; > (string:split-by-chars "ABCXDEFYHHI" (char-set:from-string "XY"))
;;; ("ABC" "DEF" "HHI")
;;; > (string:split-by-chars "" (char-set:from-string "XY"))
;;; ()
;;; > (string:split-by-chars "A" (char-set:from-string "XY"))
;;; ("A")
;;;-
;;; 
(define (string:split-by-chars source chars)
  (let ((source-length (string-length source)))
    (string:general-find-chars
       source
       chars
       0
       source-length
       (lambda (position continuation result)
	 (cons (substring source result position)
	       (continuation (+ position 1))))
       (lambda (result)
	 (if (= result source-length)
	     '()
	     (list (substring source result source-length))))
       0)))



;;;+fs
;;; Returns a list of words in `source' that are delimted by
;;; `separator'.  There are a number of decisions you can make
;;; about what you do with "empty" words when you write a function
;;; like this.  See the following examples for an explanation of what
;;; the defaults are here.
;;;- +fe
;;; Some examples :-
;;;
;;; A simple example.  "foo" is being used as the pattern to split on.
;;; This gives a "normal" result.
;;;
;;; > (string:split-by-string "abcfoodeffooghi" "foo")
;;; ("abc" "def" "ghi")
;;; 
;;; The following show what happens when the split pattern is at
;;; either end of the string :-
;;;
;;; > (string:split-by-string "foodeffooghi" "foo")
;;; ("def" "ghi")
;;;
;;; A different possible outcome would be assume there is an empty
;;; string at the start i.e. return :-
;;;
;;; ("" "def" "ghi")
;;;
;;; The next two examples show some possibly confusing situations you
;;; can get into with separators which overlap.
;;;
;;; In this the separator either appears twice or four times depending
;;; on how you look at overlapping patterns.
;;;
;;; > (string:split-by-string "abcfffdefffghi" "ff")
;;; ("abc" "fde" "fghi")
;;;
;;; I've arbitrarily decided on the above result (i.e. decided that it
;;; appears twice).  If you can think of a good reason to use the
;;; other approach, then drop me a note.
;;;
;;; The following shows that when a pattern isn't at the start or end,
;;; it is treated as if there is a null string there.
;;;
;;; > (string:split-by-string "abcfffdeffffghi" "ff")
;;; ("abc" "fde" "" "ghi")
;;;
;;; [I'm not sure I like the difference between patterns at the
;;;  start/end and patterns in the middle that I've described above]
;;;
;;; A simple example of what happens if the string doesn't contain any
;;; characters :-
;;;
;;; > (string:split-by-string "" "ff")
;;; ()
;;;
;;; If you search for the empty string, you get back all the
;;; characters in the string.
;;;
;;; > (string:split-by-string "abcfffdeffffghi" "")
;;; ("a" "b" "c" "f" "f" "f" "d" "e" "f" "f" "f" "f" "g" "h" "i")
;;;-
;;; Note, the choice of find function below is arbitrary.
;;;
(define (string:split-by-string source separator)
  (let ((separator-length (string-length separator))
	(source-length (string-length source)))
    (string:find-string
       source
       separator
       0
       source-length
       (lambda (position continuation result)
	 (cond
	   ((< position result)
 	      (continuation result))
	   ((zero? position)
	      (continuation (+ position separator-length)))
	   (else
	      (cons (substring source result position)
		    (continuation (+ position separator-length))))))
       (lambda (result)
	 (if (= result source-length)
	     '()
	     (list (substring source result source-length))))
       0)))


;;;+fs
;;; Returns a list of whitespace delimited words in the `source'
;;; If the string is empty or contains only whitespace, then it
;;; returns the empty list.  This is based on the split function in
;;; Python, which I believe is based on the Perl/Awk one.
;;;- +fe
;;; > (string:split-by-whitespace " abc d e f  ")
;;; ("abc" "d" "e" "f")
;;; > (string:split-by-whitespace "")
;;; ()
;;; > (string:split-by-whitespace "a")
;;; ("a")
;;;-
;;; The reason that this finds the whitespace characters explicitly is
;;; that I wrote it before I wrote general-find-chars.
;;; I haven't changed it yet, as this still has the advantage of being
;;; tail-recursive.  If you feel like dabbling, try re-writing this,
;;; using general-find-chars, keeping it tail-recursive.
;;;
(define (string:split-by-whitespace source)

  (define (skip-whitespace source pos)
    (cond
      ((zero? pos)
         pos)
      ((char-whitespace? (string-ref source (- pos 1)))
         (skip-whitespace source (- pos 1)))
      (else
         pos)))

  (define (skip-non-whitespace source pos)
    (cond
      ((zero? pos)
         pos)
      ((char-whitespace? (string-ref source (- pos 1)))
         pos)
      (else
         (skip-non-whitespace source (- pos 1)))))

  (let ((result '())
    	(source-length (string-length source)))
    (if (zero? source-length)
	result
	(let loop ((source source)
		   (start-position source-length)
		   (result result))
	  (let ((non-space-pos (skip-whitespace source start-position)))
	    (if (zero? non-space-pos)
		result
		(let* ((new-start-pos
			(skip-non-whitespace source non-space-pos))
		       (new-result
			(cons (substring source new-start-pos non-space-pos)
			      result)))
		  (if (zero? new-start-pos)
		      new-result
		      (loop source (- new-start-pos 1) new-result)))))))))



;;;+fs
;;; Trim any of the characters in `chars' from the left
;;; of the `source'.
;;;- +fe
;;; > (string:trim-left "  some characters " char-set:whitespace)
;;; "some characters "
;;; > (string:trim-left "some characters" char-set:lower-case)
;;; " characters"
;;;-
(define (string:trim-left source chars)
  (let ((source-length (string-length source)))
    (let loop ((current-position 0))
      (if (and (< current-position source-length)
	       (char-set:member? chars (string-ref source current-position)))
	  (loop (+ 1 current-position))
	  (substring source current-position source-length)))))



;;;+fs
;;; Trim any of the characters in `chars' from the right
;;; hand side of the `source'.
;;; This is based on the Icon function `trim(s, c)'
;;;- +fe
;;; > (string:trim-right "Betelgeuse   " char-set:whitespace)
;;; "Betelgeuse"
;;; > (string:trim-right "Betelgeuse" char-set:lower-case)
;;; "B"
;;;-
(define (string:trim-right source chars)
  (let ((source-length (string-length source)))
    (let loop ((current-position (- source-length 1)))
      (cond
        ((zero? current-position)
	   (if (char-set:member? chars (string-ref source 0))
	       ""
	       (substring source 0 1)))
	((char-set:member? chars (string-ref source current-position))
	   (loop (- current-position 1)))
	(else
	   (substring source 0 (+ 1 current-position)))))))



;;;+fs
;;; Strip any leading and trailing characters in `chars'
;;; from the `source'.  You could just call `trim-left' and
;;; `trim-right' to do this.  However, depending on how much effort
;;; has gone into the implementation, you might find that this is more
;;; efficient.
;;;- +fe
;;; > (string:trim-left-and-right " abc d e f  " char-set:whitespace)
;;; "abc d e f"
;;; > (string:trim-left-and-right "" char-set:whitespace)
;;; ""
;;; > (string:trim-left-and-right "abc" char-set:whitespace)
;;; "abc"
;;; > (string:trim-left-and-right "XXsomestuffXX" (char-set:from-string "X"))
;;; "somestuff"
;;;-
;;; You could implment this as a call each to trim-left and
;;; trim-right.  However, because I wrote this before these functions
;;; and because this version saves making an additional copy of the
;;; string, it does the explicit searching itself.
;;;
(define (string:trim-left-and-right source chars)
  (let ((source-length (string-length source)))
    (letrec
	((trim-left
 	    (lambda ()
	      (let loop ((current-position 0))
		(if (and (< current-position source-length)
			 (char-set:member?
			    chars
			    (string-ref source current-position)))
		    (loop (+ 1 current-position))
		    current-position))))
	 (trim-right
	    (lambda (stop-position)
	      (let loop ((current-position source-length))
		(if (and (< stop-position current-position)
			 (char-set:member?
			    chars
			    (string-ref source (- current-position 1))))
		    (loop (- current-position 1))
		    current-position)))))
      (let* ((left (trim-left))
	     (right (trim-right left)))
	(substring source left right)))))



;;;+fs
;;; Returns a copy of the `source' with all lowercase
;;; characters replaced by uppercase ones.  More precisely, each
;;; character of the result string is produced by applying the
;;; function char-downcase to the corresponding character of
;;; `source'. 
;;; Optional start and end positions can also be given if these are not
;;; defined then 0 and (string-length `source') are used.
;;; Based on Common-lisp string-upcase
;;;- +fe
;;; > (string:upcase "occlUDed cASEmenTs")
;;; "OCCLUDED CASEMENTS"
;;; > (string:upcase "occlUDed cASEmenTs" 5)
;;; "occlUDED CASEMENTS"
;;; > (string:upcase "occlUDed cASEmenTs" 5 15)
;;; "occlUDED CASEMEnTs"
;;;-
;;; Internally this isn't pure i.e. it uses a xxx! function.  It could
;;; be made pure by using append, but this is a lot slower.
;;;
(define (string:upcase source . optionals)
  (let* ((start (if (null? optionals) 0 (car optionals)))
	 (end (if (and (not (null? optionals)) (> (length optionals) 1))
		  (cadr optionals)
		  (string-length source)))
	 (upcase-string (string:copy source)))
    (let loop ((current-position start))
      (if (= current-position end)
	  upcase-string
	  (begin
	    (string-set!
	       upcase-string
	       current-position
	       (char-upcase (string-ref source current-position)))
	    (loop (+ current-position 1)))))))



;;; If your Scheme doesn't have provide/require, then comment this out
;(provide 'string-extns)

;;;+file-examples
;;; Some more examples of how to use the above functions.

;;; This is an example of how to use `general-find'
;;; This searches for the `pattern' in the `source'
;;; and prints out the position each time it is found.
;;; It only does this `number-of-matches' times.  If the
;;; `pattern' does not exist a `number-of-matches' times, then
;;; an message indicating how many matches were found is output.
;;;
;;; Try the following :-
;;;
;;; > (string:test:find "abc foo abc bar abc" "abc" 2)
;;;
;;; This should correctly find the string at two places, 0 and 8.
;;;
;;; > (string:test:find "abc foo abc bar abc" "abc" 7)
;;;
;;; This should find the string at 0, 8, 16 and then indicate that
;;; only 3 matches could be found.
;;;
(define string:test:find
  (lambda (source pattern number-of-matches)
    (string:findstring
       source
       pattern
       0
       (string-length source)
       (lambda (position continuation number-of-matches-so-far)
	 (if (< number-of-matches-so-far number-of-matches)
	     (begin
	       (display "found at ") (display position) (newline)
	       (continuation (+ 1 number-of-matches-so-far)))
	     number-of-matches-so-far))
       (lambda (total-number-of-matches)
	 (if (not (= total-number-of-matches number-of-matches))
	     (begin
	       (display "Could only find ")
	       (display total-number-of-matches)
	       (display " matches") (newline))))
       0)))


;;; Returns a list of all the positions that `pattern' can be
;;; found in the `source'.
;;; Note the list contains the positions in reverse order i.e. the
;;; last position is first.
;;;
;;; > (string:test:find-all-positions-as-list
;;; >    string:find-by-substring
;;; >    "abc foo abc bar abc"
;;; >    "abc")
;;; (16 8 0)
;;;
;;; > (string:test:find-all-positions-as-list
;;; >    string:find-by-string-ref
;;; >    "abc foo abc bar abc"
;;; >    "bob")
;;; ()
;;;
(define string:test:find-all-positions-as-list
  (lambda (search-function source pattern)
    (search-function
       source
       pattern
       0
       (string-length source)
       (lambda (position continuation positions-so-far)
	 (continuation (cons position positions-so-far)))
       (lambda (id) id)
       '())))


;;; Test for general-find-char
;;; Returns a list of all the positions that `char' can be
;;; found in the `source'.
;;; Note the list contains the positions in reverse order i.e. the
;;; last position is first.
;;;
;;; > (string:test:find-all-char-positions-as-list "abc foo abc bar abc" #\a)
;;; (16 13 8 0)
;;; > (string:test:find-all-char-positions-as-list "abc foo abc bar abc" #\x)
;;; ()
;;;
(define (string:test:find-all-char-positions-as-list source char)
  (string:general-find-char
     source
     char
     0
     (string-length source)
     (lambda (position continuation positions-so-far)
       (continuation (cons position positions-so-far)))
     (lambda (id) id)
     '()))


;;; Test for general-find-chars
;;; Returns a list of all the positions that `chars' can be
;;; found in the `source'.
;;; Note the list contains the positions in reverse order i.e. the
;;; last position is first.
;;;
;;; > (string:test:find-all-chars-positions-as-list
;;; >   "the quick brown fox jumps over a lazy dog"
;;; >   (char-set:from-string "ota"))
;;; (39 34 31 26 17 12 0)
;;;
;;; > (string:test:find-all-chars-positions-as-list
;;; >   "the quick brown fox jumps over a lazy dog"
;;; >   (char-set:from-string "ABCD"))
;;; ()
;;;
(define string:test:find-all-chars-positions-as-list
  (lambda (source chars)
    (string:general-find-chars
       source
       chars
       0
       (string-length source)
       (lambda (position continuation positions-so-far)
	 (continuation (cons position positions-so-far)))
       (lambda (id) id)
       '())))


;;;+fs
;;; Finds all the positions of the `source' at which `chars' don't exist.
;;; Based on the Icon function `many(c, s, i, j)'
;;;-
(define string:test:icon:many
  (lambda (source chars start end if-found if-not-found result)
    (string:general-find-chars
       source
       (char-set:complement chars)
       start
       end
       if-found
       if-not-found
       result)))


;;;+fs
;;; This example shows how you can turn the flow of control inside out
;;; to produce an Icon like create/@ sequence.  That is, instead of
;;; putting the processing in the `if-found' function to general-find,
;;; it is made to return its continuation, so it can be explicitly
;;; pumped from outside.  I can't think why you would ever want to do
;;; this, but ....
;;; This example attempts to find the `pattern' in the
;;; `source' 
;;;- +fe
;;; > (string:test:icon "abc foo abc bar abc" "abc")
;;;_
(define (string:test:icon source pattern)

  ;; A simple version of the Icon find function. To make it more
  ;; similar you need to make the start/end positions optional.
  ;; Note this returns a co-expression, not a simple value like the
  ;; Icon find.  This means that it is easy to write `create' (see
  ;; below), but means that you have to use the function value to
  ;; explicitly get the value from the co-expression.
  ;; Maybe macros or something like Common-lisp's multiple-value-bind
  ;; could tidy this up ... 
  (define icon:find
    (lambda (source pattern)
      (string:find-string
         source
	 pattern
	 0
	 (string-length source)
	 (lambda (position continuation number-of-activations)
	   (list continuation position number-of-activations))
	 (lambda (id) #f)
	 1)))
    

  ;; Creates a co-expression over the find function.
  ;; co-expressions are represented as a triple :-
  ;;   (continuation current-position number-of-activations)
  ;; Very simple due to the above definition for find
  ;;
  (define icon:create (lambda (expression) expression))


  ;; Given a `co-expression' this pumps it to produce the next value
  ;;
  (define icon:@
    (lambda (co-expression)
      ((car co-expression) (+ 1 (caddr co-expression)))))


  ;; Returns the ``size'' of the co-expression i.e. the number of
  ;; times it has been activated.  This also just happens to be the
  ;; number of times the substring has been found the the main string.
  ;;
  (define icon:* (lambda (co-expression) (caddr co-expression)))

  ;; Return the `value' of the co-expression i.e. the value it computes.
  ;;
  (define icon:value (lambda (co-expression) (cadr co-expression)))

  (let ((co-expression (icon:create (icon:find source pattern))))
    (display "Match ") (display (icon:* co-expression))
    (display " at ") (display (icon:value co-expression)) (newline)
    (let loop ((next (icon:@ co-expression)))
      (if next
	  (begin
	    (display "Match ") (display (icon:* next)) 
	    (display " at ") (display (icon:value next)) (newline)
	    (loop (icon:@ next)))))))


;;;+fs
;;; Run some tests on all the functions defined in this file.
;;; Returns an unspecified value if all the test pass, and prints out
;;; a message indicating this.
;;;-

(define (string:test:s-eq test-name a b)
  (if (not (string=? a b))
      (error test-name a b)))

(define (string:test:n-eq test-name a b)
  (if (not (= a b))
      (error test-name a b)))

(define (string:test:b-eq test-name a b)
  (if (not (eq? a b))
      (error test-name a b)))

(define (string:test:l-eq test-name comparison a b)
  (cond
   ((null? a) (or (null? b) (error test-name a b)))
   ((null? b) (or (null? a) (error test-name a b)))
   (else (and (comparison (car a) (car b))
	      (string:test:l-eq test-name comparison (cdr a) (cdr b))))))

(define (string:test:nl-eq test-name a b)
  (string:test:l-eq test-name = a b))

(define (string:test:sl-eq test-name a b)
  (string:test:l-eq test-name string=? a b))



(define (string:test:capitalise)

  (string:test:s-eq
     'capitalise-a
     (string:capitalize "occlUDed cASEmenTs")
     "Occluded Casements")

  (string:test:s-eq
     'capitalise-b
     (string:capitalize "occlUDed cASEmenTs" 2)
     "ocCluded Casements")

  (string:test:s-eq 
     'capitalise-c
     (string:capitalize "occlUDed cASEmenTs" 2 11)
     "ocCluded CaSEmenTs"))



(define (string:test:center)
  (string:test:s-eq 'center-a (string:center "Detroit" 10) " Detroit  ")
  (string:test:s-eq 'center-b (string:center "Detroit" 6) "Detroi"))
  


(define (string:test:center-with-string)

  (string:test:s-eq
     'center-with-string-a
     (string:center-with-string "Detroit" 10 "+")
     "+Detroit++")

  (string:test:s-eq 
     'center-with-string-b
     (string:center-with-string "Detroit" 20 "+*+")
     "+*++*+Detroit+*++*++")

  (string:test:s-eq
     'center-with-string-c
     (string:center-with-string "Detroit" 6 "+*+")
     "Detroi"))



(define (string:test:diff)
  (string:test:n-eq 'diff-a (string:diff "abc" "abc") 0)
  (string:test:n-eq 'diff-b (string:diff "abc" "axc") -2)
  (string:test:n-eq 'diff-c (string:diff "abc" "xbc") -2)
  (string:test:n-eq 'diff-d (string:diff "xyz" "ayz") 2)
  (string:test:n-eq 'diff-e (string:diff "pqr" "axr") 2)
  (string:test:n-eq 'diff-f (string:diff "pqrs" "axcs") 2)
  (string:test:n-eq 'diff-g (string:diff "ab" "abc") -1)
  (string:test:n-eq 'diff-h (string:diff "ax" "abc") 2)
  )



(define (string:test:downcase)

  (string:test:s-eq
     'downcase-a
     (string:downcase "occlUDed cASEmenTs")
     "occluded casements")

  (string:test:s-eq
     'downcase-b
     (string:downcase "occlUDed cASEmenTs" 5)
     "occlUded casements")

  (string:test:s-eq
     'downcase-c
     (string:downcase "occlUDed cASEmenTs" 5 15)
     "occlUded casemenTs"))



(define (string:test:test:find-all-positions-as-list)

  (string:test:nl-eq
     'test:find-all-positions-as-list-a
     (string:test:find-all-positions-as-list
        string:find-by-substring
	"abc foo abc bar abc"
	"abc")
     '(16 8 0))

  (string:test:nl-eq 
     'test:find-all-positions-as-list-b
     (string:test:find-all-positions-as-list
        string:find-by-substring
	"abc foo abc bar abc"
	"bob")
     '())

  (string:test:nl-eq
     'test:find-all-positions-as-list-c
     (string:test:find-all-positions-as-list
        string:find-by-string-ref
	"abc foo abc bar abc"
	"abc")
     '(16 8 0))

  (string:test:nl-eq
     'test:find-all-positions-as-list-d
     (string:test:find-all-positions-as-list
        string:find-by-string-ref
	"abc foo abc bar abc"
	"bob")
     '())

  (string:test:nl-eq 
     'test:find-all-positions-as-list-e
     (string:test:find-all-positions-as-list
        string:find-by-knuth-morris-pratt
	"abc foo abc bar abc"
	"abc")
     '(16 8 0))

  (string:test:nl-eq
     'test:find-all-positions-as-list-f
     (string:test:find-all-positions-as-list
        string:find-by-knuth-morris-pratt
	"abc foo abc bar abc"
	"bob") 
     '()))



(define (string:test:find-first-start-end)

  (string:test:n-eq 
     'find-first-start-end-a
     (string:find-first-start-end
        string:find-by-substring
	" abc def ghi abc"
	"abc"
	0
	16)
     1)

  (string:test:n-eq
     'find-first-start-end-b
     (string:find-first-start-end 
        string:find-by-substring
	" abc def ghi abc"
	"abc"
	2
	16)
     13)

  (string:test:b-eq 
     'find-first-start-end-c
     (string:find-first-start-end
        string:find-by-substring
	" abc def ghi abc"
	"abcd"
	0
	16)
     #f)

  (string:test:n-eq 
     'find-first-start-end-d
     (string:find-first-start-end
        string:find-by-string-ref
	" abc def ghi abc"
	"abc"
	0
	16)
     1)

  (string:test:n-eq
     'find-first-start-end-e
     (string:find-first-start-end 
        string:find-by-string-ref
	" abc def ghi abc"
	"abc"
	2
	16)
     13)

  (string:test:b-eq 
     'find-first-start-end-f
     (string:find-first-start-end
        string:find-by-string-ref
	" abc def ghi abc"
	"abcd"
	0
	16)
     #f)

  (string:test:n-eq 
     'find-first-start-end-g
     (string:find-first-start-end
        string:find-by-knuth-morris-pratt
	" abc def ghi abc"
	"abc"
	0
	16)
     1)

  (string:test:n-eq
     'find-first-start-end-h
     (string:find-first-start-end 
        string:find-by-knuth-morris-pratt
	" abc def ghi abc"
	"abc"
	2
	16)
     13)

  (string:test:b-eq 
     'find-first-start-end-i
     (string:find-first-start-end
        string:find-by-knuth-morris-pratt
	" abc def ghi abc"
	"abcd"
	0
	16)
     #f))



(define (string:test:test:find-all-char-positions-as-list)

  (string:test:nl-eq 
     'test:find-all-char-positions-as-list-a
     (string:test:find-all-char-positions-as-list "abc foo abc bar abc" #\a)
     '(16 13 8 0))

  (string:test:nl-eq
     'test:find-all-char-positions-as-list-b
     (string:test:find-all-char-positions-as-list "abc foo abc bar abc" #\x)
     '()))



(define (string:test:find-char-start-end)

  (string:test:n-eq
     'find-char-start-end-a
     (string:find-char-start-end " abc def ghi abc" #\e 0 16)
     6)

  (string:test:b-eq
     'find-char-start-end-b
     (string:find-char-start-end " abc def ghi abc" #\x 0 16)
     #f))



(define (string:test:left)
  (string:test:s-eq 'left-a (string:left "Detroit" 10 "+") "Detroit+++")
  (string:test:s-eq 'left-b (string:left "Detroit" 6) "Detroi"))



(define (string:test:map)
  (string:test:s-eq 'map-a (string:map "bevan" "aeiou" "+%#@&") "b%v+n")
  (string:test:s-eq 'map-b (string:map "bevan" "aeiou" "*****") "b*v*n")
  (string:test:s-eq 'map-c (string:map "654321" "123456" "quotas") "satouq")
  (string:test:s-eq 'map-d (string:map "124578" "12345678" "03:56:42") "035642")
  (string:test:s-eq 'map-e (string:map "124578" "12345678" "03:56:42") "035642")
  (string:test:s-eq 'map-f (string:map "hxmysz" "hx:my:sz" "03:56:42") "035642")
  (string:test:s-eq 'map-g (string:map "hx:my:sz" "hxmysz" "035642") "03:56:42")
  (string:test:s-eq 'map-h (string:map "123321" "123" "-*|") "-*||*-"))



(define (string:test:pad-left-with-string)

  (string:test:s-eq
     'pad-left-with-string-a
     (string:pad-left-with-string "Detroit" 10 "+")
     "+++Detroit")

  (string:test:s-eq
     'pad-left-with-string-b
     (string:pad-left-with-string "Detroit" 20 "+*+")
     "+*++*++*++*++Detroit")

  (string:test:s-eq
     'pad-left-with-string-c
     (string:pad-left-with-string "Detroit" 4 "+*+")
     "Detroit"))



(define (string:test:pad-right-with-string)

  (string:test:s-eq
     'pad-right-with-string-a
     (string:pad-right-with-string "Detroit" 10 "+")
     "Detroit+++")

  (string:test:s-eq
     'pad-right-with-string-b
     (string:pad-right-with-string "Detroit" 20 "+*+")
     "Detroit+*++*++*++*++")

  (string:test:s-eq
     'pad-right-with-string-c
     (string:pad-right-with-string "Detroit" 4 "+*+")
     "Detroit"))



(define (string:test:replicate)
  (string:test:s-eq 'replicate-a (string:replicate "+*+" 3) "+*++*++*+")
  (string:test:s-eq 'replicate-b (string:replicate "abc" 0) ""))



(define (string:test:replicate-to-size)
  (string:test:s-eq 'replicate-to-size-a (string:replicate-to-size "abc" 10) "abcabcabca")
  (string:test:s-eq 'replicate-to-size-b (string:replicate-to-size "abc" 1) "a")
  (string:test:s-eq 'replicate-to-size-c (string:replicate-to-size "abc" 0) ""))



(define (string:test:reverse)
  (string:test:s-eq 'reverse-a (string:reverse "string") "gnirts")
  (string:test:s-eq 'reverse-b (string:reverse "") ""))



(define (string:test:right)
  (string:test:s-eq 'right-a (string:right "Detroit" 10 "+") "+++Detroit")
  (string:test:s-eq 'right-b (string:right "Detroit" 6) "etroit"))



(define (string:test:rotate)
  (string:test:s-eq 'rotate-a (string:rotate "abcdef" 1) "fabcde")
  (string:test:s-eq 'rotate-b (string:rotate "abcdef") "fabcde")
  (string:test:s-eq 'rotate-c (string:rotate "abcdef" -2) "cdefab")
  (string:test:s-eq 'rotate-d (string:rotate "abcdef" 8) "efabcd")
  (string:test:s-eq 'rotate-e (string:rotate "abcdef" 0) "abcdef"))



(define (string:test:split-by-string)

  (string:test:sl-eq
     'split-by-string-a
     (string:split-by-string "abcfoodeffooghi" "foo")
     '("abc" "def" "ghi"))

  (string:test:sl-eq
     'split-by-string-b
     (string:split-by-string "foodeffooghi" "foo")
     '("def" "ghi"))

  (string:test:sl-eq
     'split-by-string-c
     (string:split-by-string "abcfffdefffghi" "ff")
     '("abc" "fde" "fghi"))

  (string:test:sl-eq
     'split-by-string-d
     (string:split-by-string "abcfffdeffffghi" "ff")
     '("abc" "fde" "" "ghi"))

  (string:test:sl-eq 'split-by-string-e (string:split-by-string "" "ff") '())

  (string:test:sl-eq
     'split-by-string-f
     (string:split-by-string "abcfffdeffffghi" "")
     '("a" "b" "c" "f" "f" "f" "d" "e" "f" "f" "f" "f" "g" "h" "i")))



(define (string:test:split-by-whitespace)

  (string:test:sl-eq
     'split-by-whitespace-a
     (string:split-by-whitespace " abc d e f  ")
     '("abc" "d" "e" "f"))

  (string:test:sl-eq 'split-by-whitespace-b (string:split-by-whitespace "") '())

  (string:test:sl-eq 
     'split-by-whitespace-c
     (string:split-by-whitespace "a")
     '("a")))
  


(define (string:test:upcase)

  (string:test:s-eq
     'upcase-a
     (string:upcase "occlUDed cASEmenTs")
     "OCCLUDED CASEMENTS")

  (string:test:s-eq 
     'upcase-b
     (string:upcase "occlUDed cASEmenTs" 5)
     "occlUDED CASEMENTS")

  (string:test:s-eq
     'upcase-c
     (string:upcase "occlUDed cASEmenTs" 5 15)
     "occlUDED CASEMEnTs"))



;;; The following functions test strings in conjunction with the
;;; char-set module. 

(define (string:test:find-first-in-chars)

  (string:test:n-eq
     'find-first-in-chars
     (string:find-first-in-chars
        "the quick brown fox jumps over the lazy dog"
        (char-set:from-string "vj") 0 43)
     20)

  (string:test:n-eq
     'find-first-in-chars
     (string:find-first-in-chars
        "the quick brown fox jumps over the lazy dog"
        (char-set:from-string "vj") 22 43)
     27)

  (string:test:b-eq
     'find-first-in-chars
     (string:find-first-in-chars
        "the quick brown fox jumps over the lazy dog"
        (char-set:from-string "%*)") 22 43)
     #f))



(define (string:test:test:find-all-chars-positions-as-list)

  (string:test:nl-eq
     'test:find-all-chars-positions-as-list-a
     (string:test:find-all-chars-positions-as-list
        "the quick brown fox jumps over a lazy dog"
        (char-set:from-string "ota"))
     '(39 34 31 26 17 12 0))

  (string:test:nl-eq
     'test:find-all-chars-positions-as-list-b
     (string:test:find-all-chars-positions-as-list
        "the quick brown fox jumps over a lazy dog"
        (char-set:from-string "ABCD"))
     '()))



(define (string:test:general-find-balanced-chars)

  (string:test:n-eq
     'general-find-balanced-chars-a
     (let ((str "((2*x)+3)+(5*y)"))
       (string:general-find-balanced-chars
	  str
	  (char-set:from-string "+")
	  (char-set:from-string "(")
	  (char-set:from-string ")")
	  0
	  (string-length str)
	  (lambda (position continuation result) position)
	  (lambda (result) #f)
	  'dummy-argument))
     9)

  (string:test:n-eq
     'general-find-balanced-chars-b
     (let ((str "[+,[2,3]],[*,[5,10]]"))
       (string:general-find-balanced-chars 
	  str
	  (char-set:from-string ",")
	  (char-set:from-string "[")
	  (char-set:from-string "]")
	  0
	  (string-length str)
	  (lambda (position continuation result) position)
	  (lambda (result) #f)
	  'dummy-argument))
     9)

  (string:test:n-eq
     'general-find-balanced-chars-c
     (let ((str "([a+b))+c]"))
       (string:general-find-balanced-chars
	  str
	  (char-set:from-string "+")
	  (char-set:from-string "([")
	  (char-set:from-string "])")
	  0
	  (string-length str)
	  (lambda (position continuation result) position)
	  (lambda (result) #f)
	  'dummy-argument))
     7))



(define (string:test:split-by-chars)

  (string:test:sl-eq
     'split-by-chars-a
     (string:split-by-chars "ABCXDEFYHHI" (char-set:from-string "XY"))
     '("ABC" "DEF" "HHI"))

  (string:test:sl-eq
     'split-by-chars-b
     (string:split-by-chars "" (char-set:from-string "XY"))
     '())

  (string:test:sl-eq
     'split-by-chars-c
     (string:split-by-chars "A" (char-set:from-string "XY"))
     '("A")))



(define (string:test:trim-left)

  (string:test:s-eq 
     'trim-left-a
     (string:trim-left "  some characters " char-set:whitespace)
     "some characters ")

  (string:test:s-eq
     'trim-left-b 
     (string:trim-left "some characters" char-set:lower-case)
     " characters"))



(define (string:test:trim-right)

  (string:test:s-eq
     'trim-right-a
     (string:trim-right "Betelgeuse   " char-set:whitespace)
     "Betelgeuse")

  (string:test:s-eq
     'trim-right-b
     (string:trim-right "Betelgeuse" char-set:lower-case)
     "B"))



(define (string:test:trim-left-and-right)

  (string:test:s-eq
     'trim-left-and-right-a
     (string:trim-left-and-right " abc d e f  " char-set:whitespace)
     "abc d e f")

  (string:test:s-eq
     'trim-left-and-right-b
     (string:trim-left-and-right "" char-set:whitespace)
     "")

  (string:test:s-eq
     'trim-left-and-right-c
     (string:trim-left-and-right "abc" char-set:whitespace)
     "abc")

  (string:test:s-eq
     'trim-left-and-right-d
     (string:trim-left-and-right "XXsomestuffXX" (char-set:from-string "X"))
     "somestuff"))


;;;
;;; Test all the functions defined in this file.
;;;
(define (string:test:all . have-char-set)
  (string:test:capitalise)
  (string:test:center)
  (string:test:center-with-string)
  (string:test:diff)
  (string:test:downcase)
  (string:test:test:find-all-positions-as-list)
  (string:test:find-first-start-end)
  (string:test:test:find-all-char-positions-as-list)
  (string:test:find-char-start-end)
  (string:test:left)
  (string:test:map)
  (string:test:pad-left-with-string)
  (string:test:pad-right-with-string)
  (string:test:replicate)
  (string:test:reverse)
  (string:test:right)
  (string:test:rotate)
  (string:test:split-by-string)
  (string:test:split-by-whitespace)
  
  (if (not (null? have-char-set))
      (begin
	(string:test:find-first-in-chars)
	(string:test:test:find-all-chars-positions-as-list)
	(string:test:general-find-balanced-chars)
	(string:test:split-by-chars)
	(string:test:trim-left)
	(string:test:trim-right)
	(string:test:trim-left-and-right)))

  (display "all tests passed") (newline))

;;;-file-examples
;;; eof (just for ROK)
