(module audit racket
  
  (provide (all-defined-out))
  
  (require "mdp.rkt" 
           "opt.rkt")

  ;; for debugging
  (define (printf-through form v)
    ;(printf form v)
    v)
  
  (define (unreachable-not-stationary nmdp state_i action_i log)
    (if (empty? log)
        false
        (let ([state_i+1 (first (first log))]
              [action_i+1 (second (first log))])
          (or (<= (trans-prob nmdp state_i action_i state_i+1) 0)
              (ormap (lambda (state-action-pair) 
                       (and (symbol=? state_i (first state-action-pair))
                            (not (symbol=? action_i (second state-action-pair)))))
                     log)
              (unreachable-not-stationary nmdp state_i+1 action_i+1 (rest log))))))
  
  ;; treats mdp as a NMDP, thus, mdp have better have nothing action 'N
  ;; log ::= ([q0 a1] [q1 a2] ... [qn-1 an]) 
  ;; the last state is missing as the algorithm doesn't actually use it
  ;; for a exclusivity rule (a only for p), returning true means a violation is found
  ;; for a prohabitive rule (a not for p), returning true means the policy was obeyed
  (define (impossible nmdp log)
    (or (ormap (lambda (state-action-pair)
                 (or (not (member (first state-action-pair) (states nmdp)))
                     (not (member (second state-action-pair) (actions nmdp)))))
               log)
        (unreachable-not-stationary nmdp 
                                    (first (first log))
                                    (second (first log)) 
                                    (rest log))))
     
  (define (Q-from-V mdp utilities state act)
    (+ (reward mdp state act)
       (* (discount-factor mdp)
          (summation (lambda (next-state)  
                       (* (trans-prob mdp state act next-state)
                          (utilities next-state)))
                     (states mdp)))))  
  
  (define (auditMDPapprox solveMDPapprox nmdp log)
    (or (impossible nmdp log)
        (let* ([bounds-functions (solveMDPapprox nmdp)]
               [lower-bounds (first bounds-functions)]
               [upper-bounds (second bounds-functions)])
          (ormap (lambda (state-action-pair)
                   (let* ([state (first state-action-pair)]
                          [action (second state-action-pair)]
                          [act-value-up (Q-from-V nmdp upper-bounds state action)])
                     (or (< act-value-up (lower-bounds state))
                         (and (<= act-value-up 0)
                              (not (symbol=? action 'N))))))          
                 log))))
  
  (define (auditMDPapprox-valueIter epsilon nmdp log)
    (auditMDPapprox (lambda (nmdp) (solve-mdp-bounds epsilon nmdp))
                    nmdp
                    log))
 
  (define (audit-exclusivity epsilon nmdp log)
    (if (auditMDPapprox-valueIter epsilon nmdp log)
        'violation-found
        'no-violation-found)) ; if considences are rare, then policy probably obeyed
  
  (define (audit-prohabitive epsilon nmdp log)
    (if (auditMDPapprox-valueIter epsilon nmdp log)
        'policy-obeyed
        'policy-might-be-violatoned)) ; if considences are rare, then policy probably violated
  
  (define (audit-policy epsilon policy nmdp log)
    (cond [(symbol=? policy 'exclusivity) (audit-exclusivity epsilon nmdp log)]
          [(symbol=? policy 'prohabitive) (audit-prohabitive epsilon nmdp log)]
          [else (error "Unknown policy: " policy)]))
  
  
  )
