;;; -*- Mode:Common-Lisp; Package:USER; Base:10 -*-

;;; GREP in Lisp.
;;; Portable Common Lisp, with lots of Explorer speed hacks.
;;;

;;; ChangeLog:
;;;
;;; 25 Jun 88  Rick Busdiecker  Created.
;;; 29 Jun 88  Jamie Zawinski   Optimized for the Explorer file system.
;;;                             Added the :MAX-HITS keyword.
;;; 23 Oct 88  Jamie Zawinski   Added a couple of CATCH-ERROR-RESTARTs to the inner loops of DO-WILDCARD-EXPANSION.
;;; 15 Apr 89  Jamie Zawinski 	Added Peter Norvig's optimizations: the GREP-STRING-SEARCH macro and the STRINGIFY
;;;				 local function of GREP.
;;; 30 Aug 89  Jamie Zawinski 	Added a #+TI STRIP-FONTS arg.
;;;


#-TI
(defmacro do-wildcard-expansion ((pathname-var wildcarded-pathname-or-list) &body body)
  "WILDCARDED-PATHNAME-OR-LIST is a pathname or list of pathnames.  These pathnames may have wildcards.
  PATHNAME-VAR will be bound in turn to each of the existant files represented by the pathname(s) provided.
  This is Common Lisp code."
  (let* ((files (gensym)))
    `(let* ((,files ,wildcarded-pathname-or-list))
       (if (consp ,files)
	   (setq ,files (mapcan #'directory ,files))
	   (setq ,files (directory ,files)))
       (dolist (,pathname-var ,files)
	 ,@body))))

#+TI
(defmacro do-wildcard-expansion ((pathname-var wildcarded-pathname-or-list) &body body)
  "WILDCARDED-PATHNAME-OR-LIST is a pathname or list of pathnames.  These pathnames may have wildcards.
  PATHNAME-VAR will be bound in turn to each of the existant files represented by the pathname(s) provided.
  This is Explorer specific code.
  BODY will not be executed and a warning will be printed for each expansion of the pathnames which are
  known to not be text files.  This is also faster than the CL portable code."
  (let* ((files (gensym))
	 (file (gensym))
	 (desc (gensym))
	 (rest (gensym))
	 (character-file-p (gensym)))
    `(let* ((,files ,wildcarded-pathname-or-list))
       (unless (listp ,files) (setq ,files (cons ,files nil)))
       (dolist (,file ,files)
	 (sys:catch-error-restart ((SYS:BREAK) "Give up on pattern ~A" ,file)            ; Add a resume handler for break.
	   (dolist (,desc (cdr (fs:directory-list ,file)))
	     (let* ((,pathname-var (car ,desc)))
	       (sys:catch-error-restart ((SYS:BREAK) "Give up on file ~A" ,pathname-var) ; Add a resume handler for break.
		 (let* ((,character-file-p t))
		   (do* ((,rest (cdr ,desc) (cddr ,rest)))
			((null ,rest))
		     (case (car ,rest)
		       (:CHARACTERS (setq ,character-file-p (second ,rest))
				    (unless ,character-file-p
				      (format t "~&Error: ~A is not a character file." ,pathname-var))
				    (return))
		       (:BYTE-SIZE  (unless (= 8 (or (second ,rest) 8))
				      (setq ,character-file-p nil)
				      (format t "~&Error: ~A is of byte-size ~D." ,pathname-var (second ,rest))
				      (return)))
		       (:DIRECTORY  (when (second ,rest)
				      (setq ,character-file-p nil)
				      (format t "~&Error: ~A is a directory file." ,pathname-var)
				      (return)))))
		   (when ,character-file-p
		     ,@body))))))))))


#-TI
(defmacro do-lines ((line-var file) &body body)
  "Open the file for input, binding LINE-VAR to each line in the file until EOF."
  (let* ((stream (gensym))
	 (eof (gensym)))
    `(with-open-file (,stream ,file :direction :input)
       (let* (,line-var)
	 (loop
	   (setq ,line-var (read-line ,stream nil ',eof))
	   (cond ((eq ,line-var ',eof)
		  (return))
		 (t ,@body)))))))

#+TI
(defmacro do-lines ((line-var file) &body body)
  "Open the file for input, binding LINE-VAR to each line in the file until EOF.
  This is TI specific code.  LINE-VAR will be a string with a fill-pointer.
  It is returned to a resource to be reused when BODY terminates."
  (let* ((stream (gensym))
	 (string (gensym))
	 (eof-p (gensym)))
    `(with-open-file (,stream ,file :direction :input)
       (loop
	 (multiple-value-bind (,string ,eof-p) (send ,stream :line-in nil)
	   (when ,string
	     (let* ((,line-var ,string))  ; We keep track of the string in a variable other than LINE-VAR so that BODY may
	       ,@body))                   ; change the value of LINE-VAR without making it impossible to reuse the string.
	   (sys::return-readstring ,string)
	   (when ,eof-p (return)))))))


(defmacro grep-string-search (stringa stringb case-sensitive-p)
  "A possibly more efficient version of SEARCH.  Find subsequences of STRINGB matching STRINGA."
  #-TI `(search (the string ,stringa) (the string ,stringb) :test (if ,case-sensitive-p #'string= #'string-equal))
  #+TI `(zlc:string-search (the string ,stringa) (the string ,stringb) 0 nil 0 nil ,case-sensitive-p)
  )

#+TI
(defun grep-delete-font-codes (string)
  "Destructively modifies the string to not contain any font change codes.  The string must have a fill pointer."
  (declare (type simple-string string))
  (do* ((l (length string))
	(i 0 (1+ i)))
       ((>= i l))
    (when (char= (char string i) #\Epsilon)
      (sys:copy-array-portion string (+ i 2) l string i (- l 2))
      (decf l 2)
      (setf (fill-pointer string) l)))
  string)


(defun grep (string pathname &key case-sensitive max-hits silent-p #+TI (strip-fonts t))
  " STRING is a template to match.  It may be a string, symbol, or a list.
    If it is a list, then the first level of list structure means AND, and
    the second means OR.  For example:
     ONE             matches lines containing the string ONE.
     (ONE TWO)       matches lines containing the strings ONE and TWO.
     (ONE (TWO THREE))          matches lines containing the strings ONE and TWO or ONE and THREE.
     (ONE ((TWO THREE) FOUR))   matches lines containing the strings ONE, TWO, and THREE or ONE and FOUR.
    Elements of this list may be strings or symbols.

   PATHNAME is a pathname or list of pathnames, any of which may have wildcards.
   MAX-HITS is an integer or NIL.  Once that many matches are found in a file,
    the file is not searched further.
   STRIP-FONTS is whether to ignore things which look like Explorer font change codes when comparing strings.
   Output is printed on *STANDARD-OUTPUT*, unless SILENT-P is true, in which case nothing is printed.
   The returned value is a list of all files which had hits, consed with how many hits they had."
  (declare (optimize speed (safety 0)))
  
  (check-type string (or list string symbol))
  (check-type pathname (or list pathname string symbol))
  
  (labels ((match-and (pattern string)
	     (declare (string string))
	     (if (listp pattern)
		 (dolist (sub-pattern pattern T)
		   (unless (match-or sub-pattern string)
		     (return NIL)))
		 (grep-string-search pattern string case-sensitive)))
	   
	   (match-or (pattern string)
	     (declare (string string))
	     (if (listp pattern)
		 (dolist (sub-pattern pattern NIL)
		   (when (match-and sub-pattern string)
		     (return T)))
		 (grep-string-search pattern string case-sensitive)))
	   
	   (stringify (string)
	     (typecase string
	       (STRING string)
	       (SYMBOL (symbol-name string))
	       (T      (mapcar #'stringify string)))))
    (declare (optimize speed (safety 0)))
    (let ((result '())
	  (string (stringify string)))	   ; Only check for symbols once. -norvig
      (do-wildcard-expansion (truename pathname)
	(let ((hits 0)
	      (line-number 0))
	  (block ONE-FILE
	    (do-lines (line truename)
	      (incf line-number)
	      #+TI (when strip-fonts (grep-delete-font-codes line))
	      (when (match-and string line)
		(when (zerop hits) (unless silent-p (format t "~2&In file ~A:~%" (enough-namestring truename))))
		(incf hits)
		(unless silent-p (format t "~5D: ~A~%" line-number line))
		(when (and max-hits (>= hits max-hits))
		  (return-from ONE-FILE nil)))))
	  (unless (zerop hits) (push (cons truename hits) result))))
      (nreverse result))))
