;;; -*- Syntax: Common-Lisp; Package: ARLOTJE; Base: 10; Mode: LISP -*-

;;.@chapter Slots which extract values
(in-package 'arlotje :use '(lisp))

(export '(read-english-number strip-parenthetical-comments string-match))

;;;; Extraction slots.

(define-unit extractions
  (member-of 'collections)
  (generalizations 'slotp)
  (members-have '(works-like prototypical-slot)))

(define-unit prototypical-string-extraction
  (works-like 'prototypical-slot)
  (to-compute-value 'string-extract))

(defun string-extract (unit slot)
  (let ((patterns (get-value slot 'pattern-string))
	(string-to-match-with (get-value unit (get-value slot 'source-slot))))
    (if (arlotje::failurep string-to-match-with) string-to-match-with
      (dolist (pattern patterns (arlotje::fail unit slot))
	(let ((match (string-match pattern string-to-match-with)))
	  (when match (funcall (get-value slot 'extraction-parser) match)))))))

(defun strip-parenthetical-comments (string)
  (let ((left (position #\( string)) (right (position #\) string)))
    (if (and right left)
	(strip-parenthetical-comments
	 (concatenate 'string
		      (subseq string 0 left)
		      (subseq string (+ right 1))))
	string)))

(defun string-match (pattern string)
  (cond ((and (symbolp (car pattern))
	      (equal (symbol-name (car pattern)) "?"))
	 (if (null (cdr pattern)) string
	     (and (search (cadr pattern) string)
		  (subseq string 0 (search (cadr pattern) string)))))
	((and (symbolp (car pattern))
	      (equal (symbol-name (car pattern)) "*"))
	 (and (search (cadr pattern) string)
	      (string-match (cddr pattern)
			    (subseq string (+ (search (cadr pattern) string)
					      (length (cadr pattern)))))))
	((stringp (car pattern))
	 (and (zerop (search (car pattern) string))
	      (string-match (cdr pattern)
			    (subseq string (length (car pattern))))))
	(T nil)))


;;;; Quantity tables.

(define-unit numerical-slot-extraction
  (works-like 'prototypical-slot-extraction)
  (to-compute-value 'quantity-extract)
  (extraction-parser 'read-english-number))

(defun quantity-extract (unit slot)
  (let ((patterns (get-value slot 'pattern-string))
	(raw-slot (get-value unit (get-value slot 'source-slot))))
    (if (arlotje::failurep raw-slot) raw-slot
      (let ((string-to-match-with (strip-parenthetical-comments raw-slot)))
	(dolist (pattern patterns (arlotje::fail unit slot))
	  (let ((match (string-match pattern string-to-match-with)))
	    (when match
	      (return (let ((*quantity* (get-value slot 'measured-in-units)))
			(funcall (get-value slot 'extraction-parser) match))))))))))

(defvar *quantity* nil
  "This is the quantity unit one is parsing to.")

(defvar *atomic-numbers*
  '((one 1) (two 2) (three 3) (four 4) (five 5)
    (six 6) (seven 7) (eight 8) (nine 9) (ten 10)
    (eleven 11) (twelve 12) (thirteen 13) (fourteen 14) (fifteen 15)
    (sixteen 16) (seventeen 17) (eighteen 18) (nineteen 19)
    (twenty 20) (thirty 30) (forty 40) (fifty 50)
    (sixty 60) (seventy 70) (eighty 80) (ninety 90))
  "Numbers that stand by themselves.")

(defvar *multipliers*
  '((hundred 100) (thousand 1000) (million 1000000) (billion 1000000000)
    (score 20) (tenths .1) (hundreths .01) (thousandths .001)
    (millionths .000001) (billionths .000000001) (half .5) (fourth .25))
  "Numbers that stand by themselves.")

(defun symbol-equal (s1 s2)
  "Returns true if S1 and S2 have the same print name."
  (string-equal (symbol-name s1) (symbol-name s2)))

(defun read-number-token (string)
  "Reads a space or dash separated token from a string."
  (if (char= (char string 0) #\-) (values '- 1)
    (let* ((string (string-trim " " string))
	   (token-pos (position-if #'(lambda (x) (member x '(#\- #\Space))) string))
	   (token (read-from-string (subseq string 0 token-pos))))
      (values token token-pos))))

(defun cut-string (string start &optional end)
  "Returns a substring of STRING with dashes and spaces stripped off."
  (if (null start) ""
      (string-trim "- " (subseq string start end))))

(defun reading-english-number (string number-so-far)
  "Reads a number in cardinal format."
  (if (= (length string) 0) number-so-far
      (multiple-value-bind (token token-pos) (read-number-token string)
	(cond ((numberp token)
	       (reading-english-number (cut-string string token-pos) (+ number-so-far token)))
	      ((symbol-equal token '-) (- (reading-english-number (cut-string string token-pos) 0)))
	      ((symbol-equal token 'and) (+ number-so-far (reading-english-number (cut-string string token-pos) 0)))
	      ((and (assoc token *atomic-numbers* :test 'symbol-equal)
		    token-pos (char= (elt string token-pos) #\-))
	       (multiple-value-bind (next-token next-pos) (read-number-token (cut-string string token-pos))
		 (reading-english-number
		  (cut-string (cut-string string token-pos) next-pos)
		  (+ number-so-far
		     (cadr (assoc token *atomic-numbers* :test 'symbol-equal))
		     (cadr (assoc next-token *atomic-numbers* :test 'symbol-equal))))))
	      ((assoc token *atomic-numbers* :test 'symbol-equal)
	       (reading-english-number (cut-string string token-pos)
				       (+ number-so-far (cadr (assoc token *atomic-numbers* :test 'symbol-equal)))))
	      ((assoc token *multipliers* :test 'symbol-equal)
	       (reading-english-number (cut-string string token-pos)
				       (* number-so-far (cadr (assoc token *multipliers* :test 'symbol-equal)))))
	      ((get *quantity* (intern (symbol-name token) 'keyword))
	       (* (get *quantity* (intern (symbol-name token) 'keyword)) number-so-far))))))

(defun read-english-number (string)
  (reading-english-number (remove #\, string) 0))

(dolist (translation '((:kilometers :kilometer 1)
		       (:kilometers :km 1)
		       (:kilometers :meters 1/1000)
		       (:kilometers :meter 1/1000)
		       (:kilometers :m 1/1000)
		       (:kilometers :centimeters 1/1000)
		       (:kilometers :centimeter 1/1000)
		       (:kilometers :cm 1/1000)
		       (:kilometers :miles 1.609)
		       (:kilometers :mile 1.609)
		       (:kilometers :mi 1/1000)
		       (:kilometers :nautical-miles 1.852)
		       (:kilometers :nautical-mile 1.852)
		       (:kilometers :nm 1.852)
		       (:kilometers :feet (/ 1 3281.5))
		       (:kilometers :foot (/ 1 3281.5)
		       (:kilometers :ft (/ 1 3281.5)))))
  (setf (get (car translation) (cadr translation)) (caddr translation)))


