;;;; ;;Issues ;; ;;- see the haskell benchmark suite (? name ?) ;; ;;- missing `nice` argument checking (module micro-stats (;export statistics-sets statistics-set basic-statistics generate-statistics generate-statistics-alist variance standard-deviation arithmetic-mean mean median mode percentile ;#; ;need to validate chi-square) (import scheme (chicken base) (chicken type) (chicken syntax) (chicken flonum) (chicken foreign) (only (chicken format) format) (only (srfi 1) first list-copy every) (prefix sequences seq:) (prefix (sequences utils misc) seq:) (prefix (sequences utils sort) seq:) (prefix (sequences utils sample) seq:)) (include-relative "micro-stats-types") (: statistics-sets (#!optional alist -> alist)) (: statistics-set (stats-set-id -> alist)) (: basic-statistics (seq -> (vector-of real))) (: generate-statistics (seq #!optional (or boolean symbol) -> (or false stats-alist))) (: generate-statistics-alist (seq (or false list) #!optional (vector-of real) -> (or false stats-alist))) (: generate-statistics-sublist (seq list #!optional vector -> (pair symbol *))) (: variance (seq real -> real)) (: standard-deviation (seq #!optional real -> real real)) (: arithmetic-mean (seq -> real)) (: mean (seq -> real real real)) (: mode (seq #!optional binary-predicate -> real)) (: median (seq #!optional binary-predicate -> real)) (: percentile (seq #!optional real binary-predicate -> real)) ; Experimental (: chi-square (seq (or real seq) #!optional boolean -> real)) (: chi-square-component (real real -> real)) (: yates-chi-square-component (real real -> real)) (: checked-statistics-set-id->symbol (* -> symbol)) ;;; ;from miscmacros (define-syntax define-parameter (syntax-rules () ((define-parameter name value guard) (define name (make-parameter value guard))) ((define-parameter name value) (define name (make-parameter value))) ((define-parameter name) (define name (make-parameter (void)))))) ;from moremacros (import-for-syntax (only (chicken base) symbol-append)) (define-syntax checked-guard (er-macro-transformer (lambda (frm rnm cmp) (##sys#check-syntax 'checked-guard frm '(_ symbol symbol . _)) (let ((_lambda (rnm 'lambda)) (_let (rnm 'let)) (_arg (rnm 'arg)) (?locnam (cadr frm)) (?typnam (caddr frm)) (?body (cdddr frm)) ) (let ((chknam (symbol-append 'check- (strip-syntax ?typnam)))) ;inject `(,_lambda (,_arg) (,chknam ',?locnam ,_arg) (,_let ((obj ,_arg)) ,@?body obj ) ) ) ) ) ) ) (define-syntax define-checked-parameter (syntax-rules () ((define-checked-parameter ?name ?init ?typnam ?body0 ...) (define-parameter ?name ?init (checked-guard ?name ?typnam ?body0 ...)) ) ) ) ;from check-errors (define (make-bad-argument-type-message #!optional argnam) (if (not argnam) "bad argument type" (format #f "bad `~A' argument type" argnam)) ) (define (make-type-name-tobe-message typnam) (format #f "not a ~A" typnam) ) (define (make-error-type-message typnam #!optional argnam) (string-append (make-bad-argument-type-message argnam) " - " (make-type-name-tobe-message typnam)) ) (define (alist? obj) (and (list? obj) (every pair? obj))) (define (error-alist loc obj #!optional argnam) (import (only (chicken string) conc)) (##sys#signal-hook #:type-error loc (make-error-type-message "alist" argnam) obj) ) (define (check-alist loc obj . args) (unless (alist? obj) (error-alist loc obj (let ((tmp args)) (if (null? tmp) #f (car tmp)))) ) obj ) ;;; ;; (define NORMAL-STATISTICS '( size max min arithmetic-mean (mean . (arithmetic-mean)) standard-deviation (sd . (standard-deviation)) (sigma . (standard-deviation)) )) (define VERBOSE-STATISTICS (append NORMAL-STATISTICS '( (95th . (percentile 95)) harmonic-mean geometric-mean median mode variance (var . (variance)) (sigma^2 . (variance)) ))) (define INITIAL-STATISTICS-SETS `( (normal . ,NORMAL-STATISTICS) (verbose . ,VERBOSE-STATISTICS) )) (define-checked-parameter statistics-sets INITIAL-STATISTICS-SETS alist) (define (checked-statistics-set-id->symbol obj) (case obj ((#f) 'normal) ((#t) 'verbose) (else (if (symbol? obj) obj (error 'statistics-set "not a valid stat-set-id" obj)))) ) (define (statistics-set stat-set-id) (let ((key (checked-statistics-set-id->symbol stat-set-id))) (alist-ref key (statistics-sets) eq?) ) ) ;; (define-constant ARITHMETIC-IDX 0) (define-constant HARMONIC-IDX 1) (define-constant GEOMETRIC-IDX 2) (define-constant SIGMA-IDX 3) (define-constant SIGMA^2-IDX 4) ;; (define (basic-statistics runs) (let*-values (((m h g) (mean runs)) ((s v) (standard-deviation runs m)) ) (vector m h g s v) ) ) ;; (define (generate-statistics runs #!optional stat-set-id) (let ((basics (basic-statistics runs)) (funcs (statistics-set stat-set-id)) ) (generate-statistics-alist runs funcs basics) ) ) (define (generate-statistics-alist runs funcs #!optional (basics (basic-statistics runs))) (and (list? funcs) (map (cut generate-statistics-sublist runs <> basics) funcs) ) ) (define (generate-statistics-sublist runs func #!optional (basics (basic-statistics runs))) (let* ((parameterized? (pair? func)) (func-key (if parameterized? (car func) func)) (func-name (if parameterized? (cadr func) func-key)) (func-params (if parameterized? (cddr func) '())) ) `(,func-key . ,(case func-name ((size) (seq:size runs) ) ((max) (seq:foldl max minimum-flonum runs) ) ((min) (seq:foldl min maximum-flonum runs) ) ((arithmetic-mean) (vector-ref basics ARITHMETIC-IDX) ) ((standard-deviation) (vector-ref basics SIGMA-IDX) ) ((percentile) (apply percentile runs func-params) ) ((harmonic-mean) (vector-ref basics HARMONIC-IDX) ) ((geometric-mean) (vector-ref basics GEOMETRIC-IDX) ) ((median) (median runs) ) ((mode) (mode runs) ) ((variance) (vector-ref basics SIGMA^2-IDX) ) (else (error 'statistic "unknown function" func-name)))) ) ) ;; (define (percentile runs #!optional (p 95) (less? <)) ; (assert (and (real? p) (<= 0 p) (< p 100)) 'percentile "not in [0 .. 100)" p) ; (if (seq:empty? runs) +nan.0 (let* ((ordered (seq:sort runs less?)) (size (seq:size ordered)) ;size-of vector (rank (round (* size (/ p 100)))) (idx (max 0 (- rank 1))) ) ;idx = 0 => min( max( => ordered(idx) (seq:elt ordered idx) ) ) ) ;; (define (acc-update! acc i f) (set! (vector-ref acc i) (f (vector-ref acc i)))) ;seq => arithmetic harmonic geometric (define (mean runs) (let ((size (seq:size runs))) (if (zero? size) (values 0 0 0) (let* ((n size) (r (/ 1 n)) (acc (vector 0 0 1)) (accum! (lambda (acc value) ;minimize the acccumulated value (acc-update! acc GEOMETRIC-IDX (lambda (run) (* run (expt value r)))) (acc-update! acc HARMONIC-IDX (lambda (run) (if (zero? value) run (+ run (/ 1 value))))) (acc-update! acc ARITHMETIC-IDX (lambda (run) (+ run (/ value n)))) acc ) ) (acc (seq:foldl accum! acc runs)) ) (values (vector-ref acc ARITHMETIC-IDX) (let ((h (vector-ref acc HARMONIC-IDX))) (if (zero? h) h (/ n h)) ) (vector-ref acc GEOMETRIC-IDX) ) ) ) ) ) ;; (define (median runs #!optional (less? <)) (let* ((ordered (seq:sort runs less?)) (size (seq:size ordered)) ;size-of vector (median (- (round (/ (add1 size) 2)) 1)) ) (cond ((seq:empty? runs) 0.0) ((exact? median) (seq:elt ordered median)) (else (let ((i (inexact->exact (floor median)))) (/ (+ (seq:elt ordered i) (seq:elt ordered (add1 i))) 2) ) ) ) ) ) ;; (define (mode runs #!optional (eqal? =)) (define (occur> a b) (> (cdr a) (cdr b))) (if (seq:empty? runs) 0.0 (let ((top-bin (first (seq:sort! (seq:histogram runs eqal?) occur>)))) (car top-bin)) ) ) ;; ;seq => arithmetic harmonic geometric (define (arithmetic-mean runs) (let-values (((m h g) (mean runs))) m)) ;; ;seq => sigma sigma^2 (define (standard-deviation runs #!optional (m (arithmetic-mean runs))) (let ((var (variance runs m))) (values (sqrt var) var)) ) ;; (define (variance runs m) (let ((siz (seq:size runs))) (if (<= 0 siz 1) 0.0 (let ((acc (seq:foldl (lambda (acc elm) (+ acc (expt (- elm m) 2))) 0 runs))) (/ acc (sub1 siz)) ) ) ) ) ;; (define (yates-chi-square-component o e) (let ((r (- (abs (- o e)) 0.5))) (/ (* r r) e) ) ) (define (chi-square-component o e) (let ((r (- o e))) (/ (* r r) e) ) ) (define (chi-components component observed expected) (define (xpct-elt-comp s i) (component (seq:elt s i) (seq:elt expected (seq:index i))) ) (if (seq:sequence? expected) (seq:smap* #() xpct-elt-comp observed) (seq:smap #() (cut component <> expected) observed) ) ) (define (chi-square observed expected #!optional yates?) ; (assert (seq:sequence? observed) 'chi-square "observed not a sequence" observed expected yates?) (assert (or (seq:sequence? expected) (real? expected)) 'chi-square "expected not a sequence or real" observed expected yates?) (when (and (seq:sequence? observed) (seq:sequence? expected)) (assert (= (seq:size expected) (seq:size observed)) 'chi-square "length observed not same length expected" observed expected yates?) ) ; (let ((component (if yates? yates-chi-square-component chi-square-component))) (seq:reduce + 0 (chi-components component observed expected)) ) ) ) ;module micro-stats