; -*- Scheme -*-
;
; $Id: string10.scm,v 1.2 1993/10/26 16:35:43 bevan Exp $
;

;+doc
; procedure: substring:find-by-knuth-morris-pratt
; arguments: pattern string start end if-found if-not-found state
; signature: substring-searcher
; pre:       (and (<= start end (string-length string))
;                 (<= (string-length pattern) (string-length string)))
;
; This uses the algorithm described in :-
;
;   "Fast Pattern Matching in Strings"
;   SIAM J. Computing 6(2):323-350 1977
;   D. E. Knuth, J. H. Morris and V. R. Pratt
;
; I actually read about the algorithm in :-
;
;   "Pattern Matching in Strings"
;   Alfred V. Aho
;   pages 325-347 of
;   Formal Language Theory - Perspectives and Open Problems
;   Ronald V. Brook (editor)
;
; See also Hackmem 35 by Bill Gosper.
;
; See SUBSTRING:FIND-STRING for a full explanation of all the parameters.
;
; This algorithm is O(m + n) where m and n are the 
; lengths of the pattern and string respectively
;
; 
;-doc

(define substring:find-by-knuth-morris-pratt
  (lambda (p)
    (let* ((pl (string-length p))
	   (nps 
	    (if (zero? pl)
		'dummy-argument-because-not-used
		(substring::kmp-generate-next-posns p pl))))
      (lambda (st s e yes no r)
	(let loop ((si s) (pi 0) (r r))
	  (cond
	   ((= pi pl)
	    (let* ((fp (- si pl))
		   (next (if (>= si e) no (lambda (r) (loop (+ 1 fp) 0 r)))))
	      (yes fp next r)))
	   ((>= si e) (no r))
	   ((or (negative? pi) (char=? (string-ref st si) (string-ref p pi)))
	    (loop (+ 1 si) (+ 1 pi) r))
	   (else (loop si (vector-ref nps pi) r))))))))

;-------------

; procedure: substring::kmp-generate-next-posns
; arguments: pattern pattern-length
; signature: string x int -> vector[int]
; pre:       (= pattern-length (string-length pattern))

(define substring::kmp-generate-next-posns
  (lambda (p len)
    (let ((pe (- len 1))
	  (nps (make-vector len)))
      (vector-set! nps 0 -1)
      (let loop ((i 0) (j -1))
	(cond
	 ((= i pe) nps)
	 ((or (= j -1) (char=? (string-ref p i) (string-ref p j)))
	  (let ((i (+ 1 i)) (j (+ 1 j))) (vector-set! nps i j) (loop i j)))
	 (else (loop i (vector-ref nps j))))))))

;------------

;+doc
; procedure: string:find-by-knuth-morris-pratt
; arguments: pattern source if-found if-not-found state
; signature: forall a => string x string x ? x ? x a -> a
; pre:       (<= (string-length pattern) (string-length source)))
;
; Equivalent to SUBSTRING:FIND-BY-SUBSTRING with a START of 0 and an
; END of (STRING-LENGTH SOURCE)
;-doc

(define string:find-by-knuth-morris-pratt
  (lambda (p s yes no st)
    ((substring:find-by-knuth-morris-pratt p) s 0 (string-length s) yes no st)))

; eof
