;;; -*- Mode:Common-Lisp; Package:(RSTATS); Base:10 -*-

;;;  Rich Acuff, Stanford KSL, Aug-87
;;;
;;;  Tool like Sun's perfmeter.  Uses the RSTATS RPC calls to get (and
;;;  give) information about remote systems' performance statistics.
;;;
;;;  To be done:
;;;   - Error handlers in all processes
;;;   - Indicate when remote machines are unreachable
;;;   - Better strip charts and other meters

;;;----------------------------------------------------------------------
;;;
;;;  Constants and declarations

(in-package 'rstats)
(require 'graphical-value-monitors)

(defvar *stats* nil "A plist mapping host objects to STATS objects.")

(defstruct stats
  (last-updated (get-internal-real-time))	;When the stats were last updated
  (cpu-%-used 0)		      ;% of CPU used since last update
  (last-cpu-used 0)		      ;Total non-idle CPU for last update
  (last-cpu-total 0)		      ;Total CPU atlast update
  (disk-xfer/sec 0)		      ;Current disk transfers per second
  (last-disk-total 0)		      ;Total disk transfers at last update
  (paging-rate 0)		      ;Current pages per second paged
  (last-paging-total 0)		      ;Total pages paged atlast update
  (swap/sec 0)			      ;Current "swaps" (?) per second
  (last-swapping-total 0)	      ;Total "swaps" at last update
  (intrs/sec 0)			      ;Current device interrupts per second
  (last-intr-total 0)		      ;Total device interrupts at last update
  (pkts/sec 0)			      ;Current network packets per second (output?)
  (last-pkt-total 0)		      ;Total network packets at last update
  (error/sec 0)			      ;Current network errors per second
  (last-error-total 0)		      ;Total input/output network errors at last update
  (coll/sec 0)			      ;Current network collisions per second
  (last-collision-total 0)	      ;Total collisions at last update
  (switch/sec 0)		      ;Current context switches per second
  (last-switch-total 0)		      ;Total context switches at last update
  (load-ave 0)			      ;Current one min ave # runnable procs
  )

(defconstant Rstats-Prog 100001)
(defconstant Rstats-Vers 2)
(defconstant Rstats-Stats-Proc 1)

(defstruct statsswtch
  "Structure returned from an RSTAT call as defined in Sun's
   /usr/include/rpcsvc/rstat.h"
  time						;internal time this data was obtained
  (cpu-user 0 :type integer)
  (cpu-nice 0 :type integer)
  (cpu-sys  0 :type integer)
  (cpu-idle 0 :type integer)
  (disk-xfer-0 0 :type integer)
  (disk-xfer-1 0 :type integer)
  (disk-xfer-2 0 :type integer)
  (disk-xfer-3 0 :type integer)
  (v-pgpgin 0 :type :integer)
  (v-pgpgout 0 :type :integer)
  (v-pswpin 0 :type :integer)
  (v-pswpout 0 :type :integer)
  (v-intr 0 :type :integer)
  (if-ipackets 0 :type :integer)
  (if-ierrors 0 :type :integer)
;  (if-opackets 0 :type :integer)     ;; I couldn't find output pkts in the RSTAT data
  (if-oerrors 0 :type :integer)
  (if-collisions 0 :type :integer)
  (v-swtch 0 :type :integer)
  (avenrun-0 0 :type :integer)
  (avenrun-1 0 :type :integer)
  (avenrun-2 0 :type :integer)
  (boottime 0 :type :integer)
  )

(defresource statsswtchs () :constructor (make-statsswtch))

;;;----------------------------------------------------------------------
;;;
;;;  The primary interface

;;;Edited by Acuff                 16 Nov 87  12:39
(defun watch (host &optional (statistic :cpu) maximum)
  "Display a picture of the time varying status of STATISTIC on HOST in
   a Graphical Valu Monitor.  Statistic is one of:

      :CPU  - Percentage of CPU used
      :DISK - Disk transfers per second
      :PAGE - Paging activity per second
      :SWAP - Swapping activity per second ???
      :INTR - Device interrupts per second
      :PKTS - Ethernet packets per second
      :ERRS - Ethernet errors per second
      :COLLS - Ethernet collisions per second
      :CNTXT - Context switches per second
      :LOAD - 1 minute average of the number of runnable processes

   MAXIMUM is the max value for the monitor.
"
  (check-arg statistic
	     (member statistic '(:cpu :disk :page :swap :intr :pkts :errs
				 :colls :cntxt :load))
	     "an RSTAT statistic to watch")
  (let ((stats (make-stats)))
    (make-instance 'w:graphical-monitor-window
		   :update-fn #'(lambda ()
				  (update-stats host stats)
				  (case statistic
				    (:cpu (stats-cpu-%-used stats))
				    (:disk (stats-disk-xfer/sec stats))
				    (:page (stats-paging-rate stats))
				    (:swap (stats-swap/sec stats))
				    (:intr (stats-intrs/sec stats))
				    (:pkts (stats-pkts/sec stats))
				    (:errs (stats-error/sec stats))
				    (:colls (stats-coll/sec stats))
				    (:cntxt (stats-switch/sec stats))
				    (:load (stats-load-ave stats))))
		 :name (format nil "~A ~A" host statistic)
		 :delta? nil
		 :max (or maximum
			  (case statistic
			    (:cpu 100)
			    (:disk 50)
			    ((:page :swap :intr :cntxt) 30)
			    ((:errs :colls) 4)
			    (:load 3)
			    (:pkts 50)))
		 :interval 5
		 :expose-p t
		 )
    )
  )

;;;----------------------------------------------------------------------
;;;
;;;  Gathering code

(defun start-gathering-stats (host)
  "Set up a process to gather RSTAT type system statistics for host HOST."
  (setf host (net:parse-host host))
  (let ((stats (make-stats)))
    (putprop *stats* stats host)
    (process-run-function (format nil "~A Stats" host) #'gather-stats host stats)
    )
  )

(defun gather-stats (host stats)
  "Gather RSTAT type stats for host HOST into STATS once a second."
  (update-stats host stats)
  (sleep 1)
  )

(defun update-stats (host stats)
  "Update the RSTATS stats in STATS for HOST."
  (using-resource (ss statsswtchs)
    (callrpc host Rstats-Prog Rstats-Vers Rstats-Stats-Proc
	     :xdr-void nil
	     'xdr-statsswtch ss)
    (let ((sec (/ (- (statsswtch-time ss) (stats-last-updated stats))
		  (float internal-time-units-per-second))))
      (setf (stats-last-updated stats) (get-internal-real-time))
      (update-cpu stats ss)
      (update-disk-xfers stats ss sec)
      (update-paging stats ss sec)
      (update-swapping stats ss sec)
      (update-interrupts stats ss sec)
      (update-packets stats ss sec)
      (update-errors stats ss sec)
      (update-collisions stats ss sec)
      (update-context-switches stats ss sec)
      (update-load-ave stats ss sec)
      )
    )
  )

(defun xdr-statsswtch (stream &optional statsswtch)
  "Read the STATSSWTCH structure from the XDR stream STREAM."
  (rpc:default-and-resolve statsswtch statsswtch make-statsswtch)
  (macrolet ((decode-integer (op)
		`(send stream :xdr-integer (locf (,op statsswtch))))
	     (decode-unsigned (op)
		`(send stream :xdr-unsigned (locf (,op statsswtch)))))
    (setf (statsswtch-time statsswtch) (get-internal-real-time))
    (decode-integer statsswtch-cpu-user)
    (decode-integer statsswtch-cpu-nice)
    (decode-integer statsswtch-cpu-sys)
    (decode-integer statsswtch-cpu-idle)
    (decode-integer statsswtch-disk-xfer-0)
    (decode-integer statsswtch-disk-xfer-1)
    (decode-integer statsswtch-disk-xfer-2)
    (decode-integer statsswtch-disk-xfer-3)
    (decode-unsigned statsswtch-v-pgpgin)
    (decode-unsigned statsswtch-v-pgpgout)
    (decode-unsigned statsswtch-v-pswpin)
    (decode-unsigned statsswtch-v-pswpout)
    (decode-unsigned statsswtch-v-intr)
    (decode-integer statsswtch-if-ipackets)
    (decode-integer statsswtch-if-ierrors)
    (decode-integer statsswtch-if-oerrors)
    (decode-integer statsswtch-if-collisions)
    (decode-unsigned statsswtch-v-swtch)
    (decode-integer statsswtch-avenrun-0)
    (decode-integer statsswtch-avenrun-1)
    (decode-integer statsswtch-avenrun-2)
    (decode-integer statsswtch-boottime)
    (decode-integer statsswtch-boottime)
    )
  statsswtch
  )

(defun update-cpu (st ss)
  "Update the CPU utilization info in ST from the data in the statsswtch SS."
  (let* ((used-total (+ (statsswtch-cpu-user ss) (statsswtch-cpu-nice ss)
			(statsswtch-cpu-sys ss)))
	 (new-used (- used-total (stats-last-cpu-used st)))
	 (total (+ used-total (statsswtch-cpu-idle ss)))
	 temp)
    ;; Percentage utilized
    (setf (stats-cpu-%-used st)
	  (if (zerop (setf temp (- total (stats-last-cpu-total st))))
	      0
	      (round (* new-used 100) temp)))
    (setf (stats-last-cpu-used st) used-total)
    (setf (stats-last-cpu-total st) total)
    )
  )

(defun update-disk-xfers (st ss sec)
  "Update the disk transfer count info in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (let ((new-total (+ (statsswtch-disk-xfer-0 ss) (statsswtch-disk-xfer-1 ss)
		      (statsswtch-disk-xfer-2 ss) (statsswtch-disk-xfer-3 ss))))
    (setf (stats-disk-xfer/sec st)
	  (round (- new-total (stats-last-disk-total st)) sec))
    (setf (stats-last-disk-total st) new-total)
    )
  )

(defun update-paging (st ss sec)
  "Update the paging info (pages/sec) in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (let ((total (+ (statsswtch-v-pgpgin ss) (statsswtch-v-pgpgout ss))))
    (setf (stats-paging-rate st)
	  (round (- total (stats-last-paging-total st)) sec))
    (setf (stats-last-paging-total st) total)
    )
  )

(defun update-swapping (st ss sec)
  "Update the swapping info (??/sec) in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (let ((total (+ (statsswtch-v-pswpin ss) (statsswtch-v-pswpout ss))))
    (setf (stats-swap/sec st)
	  (round (- total (stats-last-swapping-total st)) sec))
    (setf (stats-last-swapping-total st) total)
    )
  )
  
(defun update-interrupts (st ss sec)
  "Update the interrupt info (??/sec) in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (setf (stats-intrs/sec st)
	(round (- (statsswtch-v-intr ss) (stats-last-intr-total st)) sec))
  (setf (stats-last-intr-total st) (statsswtch-v-intr ss))
  )

(defun update-packets (st ss sec)
  "Update the ether info (pkts/sec) in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (setf (stats-pkts/sec st)
	(round (round (- (statsswtch-if-ipackets ss) (stats-last-pkt-total st)) sec)))
  (setf (stats-last-pkt-total st) (statsswtch-if-ipackets ss))
  )

(defun update-errors (st ss sec)
  "Update the ether error info (error/sec) in ST from the data in the statsswtch SS.
   SEC is the number of seconds between old data in ST and new in SS."
  (let ((total (+ (statsswtch-if-ierrors ss) (statsswtch-if-oerrors ss))))
    (setf (stats-error/sec st)
	  (round (- total (stats-last-error-total st)) sec))
    (setf (stats-last-error-total st) total)
    )
  )

(defun update-collisions (st ss sec)
  "Update the ether collision info (coll/sec) in ST from the data in the
   statsswtch SS.  SEC is the number of seconds between old data in ST and
   new in SS."
  (setf (stats-coll/sec st)
	(round (- (statsswtch-if-collisions ss) (stats-last-collision-total st)) sec))
  (setf (stats-last-collision-total st) (statsswtch-if-collisions ss))
  )

(defun update-context-switches (st ss sec)
  "Update the context switch info (swtch/sec) in ST from the data in the
   statsswtch SS.  SEC is the number of seconds between old data in ST and
   new in SS."
  (setf (stats-switch/sec st)
	(round (- (statsswtch-v-swtch ss) (stats-last-switch-total st)) sec))
  (setf (stats-last-switch-total st) (statsswtch-v-swtch ss))
  )

(defun update-load-ave (st ss sec)
  "Update the load average info (njobs/min) in ST from the data in the
   statsswtch SS.  SEC is the number of seconds between old data in ST and
   new in SS, but is unused."
  (declare (ignore sec))
  (setf (stats-load-ave st) (/ (statsswtch-avenrun-0 ss) 256.0))
  )

;;;----------------------------------------------------------------------
;;;
;;;  Server

(DEFMACRO my-READ-global-TIME-METER (name)
  (LET ((LOW (INTERN (STRING-APPEND name "-LOW") "SYS"))
	(HIGH (INTERN (STRING-APPEND name "-HIGH") "SYS")))
    `(DPB ,HIGH (BYTE (1- (BYTE-SIZE sys:%%Q-Pointer))
		      (1- (BYTE-SIZE sys:%%Q-Pointer))) ,LOW)))

(defun rstats-server (&optional ignore)
  "Server function for the RSTATS procedure of the RSTATS RPC program.
   Gets no arg and returns a STATSWTCH structure."
  (let ((ss (make-statsswtch)))
    ;; Borrowed from SYS:TIME-STATS
    (setf (statsswtch-cpu-user ss)
	  (floor (my-read-global-time-meter sys:global-process-total-time)
		 1000000/60))
    (setf (statsswtch-cpu-nice ss) 0)
    (setf (statsswtch-cpu-sys ss) 0)
    (setf (statsswtch-cpu-idle ss)		 ;Idle = total - used (sort of)
	  (time-difference
	    (time-difference (time) sys:last-time-global-stats-reset)
	    (floor (my-read-global-time-meter sys:global-process-total-time)
		   1000000/60))) 
    ;; Borrow this from GRAPHICAL-VALUE-MONITORS.  Pretend we've only
    ;; got one disk.
    (setf (statsswtch-disk-xfer-0 ss) (si:total-disk-ops))
    (setf (statsswtch-disk-xfer-1 ss) 0)
    (setf (statsswtch-disk-xfer-2 ss) 0)
    (setf (statsswtch-disk-xfer-3 ss) 0)
    ;; Net packets.  Since I can't find a field for output packets,
    ;; we'll put them both in the input packet count
    (setf (statsswtch-if-ipackets ss)
	  (+ (net:pkts-received) (net:pkts-transmitted)))
    ;; Collisions
    (setf (statsswtch-if-collisions ss)
	  (loop for ctl on net:controller-list
		when (typep ctl 'ethernet:ethernet-controller)
		sum (send ctl :collision-count)))
    ;; Load average
    (setf (statsswtch-avenrun-0 ss) (round (* (load-average) 256)))
    ;; Skip paging, swapping, interrupts, and boottime
    ss
    )
  )

(rpc:registerrpc Rstats-Prog Rstats-Vers Rstats-Stats-Proc
		 'rstats-server :xdr-void 'xdr-statsswtch)

;;;----------------------------------------------------------------------
;;;
;;;  Load Average

(defvar *load-average-resolution* 60
  "How many samples a minute to take of the number of runnable processes
   to compute the load average.  Use SET-LOAD-AVERAGE-RESOLUTION to
   change.")

(defvar *load-average-db* (make-array *load-average-resolution*)
  "Place to keep load averge data")

(defvar *load-avergae-db-ptr* 0
   "Index into *LOAD-AVERAGE-DB* of next-to-be-updated slot.")

(defvar *load-average-interval* (/ *load-average-resolution* 60)
  "Number of seconds between updates to *LOAD-AVERAGE-DB*.  Use
   SET-LOAD-AVERAGE-RESOLUTION to change.")

(defun set-load-average-resolution (new-res)
  "Change how many samples of the number of runnable processes are kept
   (and therefore the interval between taking them) to compute the one
   minute load average."
  (check-arg new-res (and (integerp new-res) (plusp new-res))
	     "a positive integer")
  (let ((new-db (make-array new-res)))
    ;; Copy the old data
    (do ((iold *load-avergae-db-ptr* (1- iold))
	 (inew 0 (1+ inew)))
	((or (= inew new-res)	      ;ran out of new space
	     (> inew *load-average-resolution*))      ;ran out of old data
	 (setf *load-avergae-db-ptr*
	       (if (= inew new-res) 0 inew)))
      (setf (aref new-db inew) (aref *load-average-db* iold))
      (when (zerop iold) (setf iold (1- *load-average-resolution*)))
      )
    (setf *load-average-resolution* new-res)
    (setf *load-average-db* new-db)
    (setf *load-average-interval* (/ *load-average-resolution* 60))
    )
  )

(defun keep-load-average ()
  (loop
    (progn
      (update-load-average-db)
      (sleep *load-average-interval*)
      )
    )
  )

(defun update-load-average-db ()
  (when (= *load-avergae-db-ptr* *load-average-resolution*)
    (setf *load-avergae-db-ptr* 0))
  (setf (aref *load-average-db* *load-avergae-db-ptr*)
	(1- (number-of-runnable-procs)))
  (incf *load-avergae-db-ptr*)
  )

(defun number-of-runnable-procs ()
  (let ((sum 0))
    (dolist (proc sys:active-processes sum)
      (unless (car proc) (return-from number-of-runnable-procs sum))
      (when (apply (second proc) (third proc))
	(incf sum))
      )
    )
  )

;;;Edited by Acuff                 16 Nov 87  12:39
(defun load-average ()
  "The average number of runnable processes for the last minute."
  (/ (reduce #'+ *load-average-db*) 60.0)
  )

(defun start-rstat-server ()
  (process-run-function '(:name "Update Load Average" :priority 2)
			'keep-load-average))
