;;; -*- Syntax: Common-lisp; Mode: LISP; Package: USER; Fonts: MEDFNT -*-

;;;; TeXIndex in Common Lisp

;; Why?  To allow fancy items in the \indexentry elements.

;; The input is an .idx file, i.e., each line contains
;; \indexentry{<key>, <subkey>}{<ref>}
;; The unix versions seem to barf when LaTeX commands appear in
;; the key or the subkey.  We will extract the real key and real
;; subkey from the morass of LaTeX commands, and do the sort with
;; them.

(defstruct (indel (:print-function (lambda (in str ignore)
				     (format str "<Entry ~A, ~A>"
					     (indel-real-key in)
					     (indel-real-subkey in))))
		  (:predicate index-element?))
  (key "")
  (subkey "")
  (ref "")
  (real-key "")
  (real-subkey ""))

(defvar *index-elements* nil)
(defvar *by-letter* nil) ;; subdivided by letter.

(defun texindex (fname path)
  ;; Read in the idx file & build index elements
  (read-idx-file (concatenate 'string path fname ".idx"))
  ;; Split them by first letter of real key, since
  ;; this is where \\indexspace's get inserted.
  (make-by-letter-index)
  ;; Now sort all the way down, by key, then by subkey, then by ref.
  (sort-all-the-way)
  ;; Now "unfold" the index into a set of \\index entries.
  (unfold-index)
  (produce-ind-file (concatenate 'string path fname ".ind")))

;;; Snarfing the .idx file

(defun read-idx-file (fname)
  (setq *index-elements* nil)
  (with-open-file (input fname :direction :input)
    (let ((marker (cons nil nil)))
      (do ((line (read-line input nil marker)
		 (read-line input nil marker))
	   (indel nil)
	   (nlines 0) (failed? 0))
	  ((eq line marker) (format t "~% ~D lines read." nlines))
	(incf nlines)
	(setq indel (make-indel-from-string line))
	(cond ((index-element? indel)
	       (push indel *index-elements*))
	      (t (incf failed?)))))))

(defun make-indel-from-string (str)
  (setq str (subseq str 12)) ;; strip off the "\indexentry{" part.
  (multiple-value-bind (key-subkey rest)
      (combine-string-until #\} str)
    ;; Extract reference by stripping off brackets
    (let ((ref (subseq rest 1 (1- (length rest))))
	  (comma? (position #\,  key-subkey :from-end t))
	  (key nil) (subkey nil))
      (cond (comma?
	     (setq key (subseq key-subkey 0 comma?)
		   subkey (subseq key-subkey (1+ comma?))))
	    (t (setq key key-subkey subkey "")))
      (make-indel :key key :subkey subkey :ref ref
		  :real-key (extract-key-from-Latex key)
		  :real-subkey (extract-key-from-Latex subkey)))))

;;; String hackery

(defun combine-string-until (echar str)
  ;; returns two values, the stuff up to the occurrance of echar
  ;; and the stuff after.  Complication: must skip over stuff inside
  ;; brackets!
  (do ((pos 0)
       (char #\  )
       (counter 0)
       (done? nil))
      (done? (values (subseq str 0 pos) (if (= pos (length str)) ""
					    (subseq str (1+ pos)))))
    (setq char (elt str pos))
    (cond ((char= char echar) ;; maybe done
	   (cond ((and (> counter 0) (char= echar #\}))
		  ;; Nope, just decrement counter
		  (decf counter))
		 (t (setq done? t))))
	  ((char= char #\}) (decf counter))
	  ((char= char #\{) (incf counter)))
    (unless done? (incf pos))))

(defun extract-key-from-latex (str &aux (result "") (start 0) char (state :cruise))
  ;; Flush all {'s and }'s.
  ;; Flush all words starting with \.
  ;; Keep everything else in order as the string.
  (dotimes (i (length str))
   (setq char (elt str i))
   (case char
     (#\\  (setq state :ignore-token)) ;; Read until end
     ((#\space  #\{  #\} )
      (case state
	(:ignore-token (setq state :cruise))
	(:cruise )
	(:snarf-token
	  (setq result (concatenate 'string
				    result 
				    (if (string= result "") "" " ")
				    (subseq str start i)))
	  (setq state :cruise))))
     (t ;; Some character that shouldn't be ignored
       (case state 
	 (:cruise (setq start i state :snarf-token))))))
  (when (eq state :snarf-token)
    (setq result (concatenate 'string
			      result (if (string= result "") "" " ")
			      (subseq str start))))
  result)

;;;; Sorting the index

;; Initial step is to divide by first letter, since space gets inserted
;; in the index.

(defun first-letter (indel) (char-upcase (elt (indel-real-key indel) 0)))

(defun make-by-letter-index (&aux entry char)
  (setq *by-letter* nil)
  (dolist (indel *index-elements*)
    (unless (string= (indel-real-key indel) "")
      ;; Lose if key is single LaTeX macro, or something bogon.
      (setq char (first-letter indel))
      (setq entry (assoc char *by-letter* :test #'char=))
      (unless entry (setq entry (cons char nil))
	      (push entry *by-letter*))
      (push indel (cdr entry))))
  (setq *by-letter* (sort *by-letter* #'(lambda (a b) (char< (car a) (car b))))))

;; Next step is to divide each by-letter bucket into those with identical keys.
;; This gets done again and again, so generalize

(defun sort-by-key (bucket extractor)
  (let ((news nil)
	(entry nil))
    (dolist (indel bucket)
      (setq entry (assoc (funcall extractor indel) news :test #'string=))
      (unless entry (setq entry (cons (funcall extractor indel) nil))
	      (push entry news))
      (push indel (cdr entry)))
    ;; Now sort on keys
    (setq news (sort news #'(lambda (e1 e2) (string< (car e1) (car e2)))))))

(defun sort-all-the-way ()
  (dolist (letter-entry *by-letter*)
    (setf (cdr letter-entry)
	  (sort-by-key (cdr letter-entry) #'indel-real-key))
    (dolist (key-entry (cdr letter-entry))
      ;; Now sort by common keys
      (setf (cdr key-entry)
	    (sort-by-key (cdr key-entry) #'indel-real-subkey))
      ;; Now sort by reference (Phew!)
      (dolist (subkey-entry (cdr key-entry))
	(setf (cdr subkey-entry)
	      (sort-by-key (cdr subkey-entry) #'indel-ref))))))

;;;; Now create the index itself by unwinding

(defvar *unfolded* nil)

(defun unfold-index (&aux str)
  (setq *unfolded* nil)
  (dolist (entry *by-letter*)
    (setq str "")
    (dolist (item (cdr entry))
      (multiple-value-bind (indels substring)
	  (unfold-item-index item)
	(setq str (format nil "~A~% \\index ~A~A~%"
			  str (indel-key (car indels))
			  substring))))
    (push str *unfolded*))
  (setq *unfolded* (nreverse *unfolded*)))

(defun unfold-item-index (subitems)
  (cond ((null (cddr subitems))
	 ;; zero or one subitem
	 (multiple-value-bind (indels substring)
	     (unfold-ref-index (cdar (cdr subitems)))
	   (values indels
		   (format nil "~A, ~A"
			   (if (string= (caadr subitems) "") ""
			       (concatenate 'string ", " ;(caadr subitems)
					    (indel-subkey (car indels))))
			   substring))))
	(t ;;multiple subitems
	 (let ((ins nil) (str ""))
	   (dolist (sube (cdr subitems))
	     (multiple-value-bind (indels substring)
		 (unfold-ref-index (cdr sube))
	       (setq ins (append indels ins))
	       (setq str (format nil "~A~%   \\subitem ~A, ~A"
				 str (if (string= (car sube) "") "definition"
					 (indel-subkey (car indels))  ;(car sube)
					  ) substring))))
	   (values ins str)))))

(defun unfold-ref-index (refs)
  (let ((indels nil) (str nil))
    (dolist (entry refs)
      (push (car entry) str)
      (push (cadr entry) indels))
    (values indels (commify-list (nreverse str)))))

;;;; Producing output

(defun produce-ind-file (fout)
  (with-open-file (out fout :direction :output)
    (produce-ind-prelude out)
    (dolist (letter-entry *unfolded*)
      (format out "~%~%  \\indexspace ~%~A" letter-entry))
    (produce-ind-closing out)))

(defun produce-ind-prelude (str)
  (format str "~%~% \\begin{theindex}"))

(defun produce-ind-closing (str)
  (format str "~%~% \\end{theindex}"))

(defun commify-list (items)
  ;; useful helper
  (cond ((null items) "")
	((cdr items)
	 (format nil "~A, ~A" (car items)
		 (commify-list (cdr items))))
	(t (format nil "~A" (car items)))))