;;;; ;;Issues ;; ;;- see the haskell benchmark suite (? name ?) ;; ;;- missing `nice` argument checking (module micro-stats (;export statistics-sets statistics-set basic-statistics generate-statistics generate-statistics-alist variance standard-deviation absolute-deviation arithmetic-mean mean median mode quantile percentile interquartile-range five-number-summary kurtosis skewness covariance correlation ;#; ;need to validate chi-square) (import scheme (chicken base) (chicken type) (chicken syntax) (chicken fixnum) (chicken flonum) (chicken foreign) (only (chicken format) format) (only (srfi 1) first list-copy every fold) (prefix sequences seq:) (prefix (sequences utils misc) seq:) (prefix (sequences utils sort) seq:) (prefix (sequences utils sample) seq:)) (include-relative "micro-stats-types") (: statistics-sets (#!optional alist -> alist)) (: statistics-set (stats-set-id -> alist)) (: basic-statistics (seq -> (vector-of real))) (: generate-statistics (seq #!optional (or boolean symbol) -> (or false stats-alist))) (: generate-statistics-alist (seq (or false list) #!optional (vector-of real) -> (or false stats-alist))) (: generate-statistics-sublist (seq list #!optional vector -> (pair symbol *))) (: variance (seq #!optional (or boolean real) -> real)) (: standard-deviation (seq #!optional (or boolean real) -> real real)) (: stddev (seq #!optional (or boolean real) -> real)) (: absolute-deviation (seq -> real)) (: arithmetic-mean (seq -> real)) (: mean (seq -> real real real)) (: mode (seq -> real)) (: median (seq -> real)) (: quantile (seq real -> real)) (: percentile (seq #!optional real -> real)) (: interquartile-range (seq -> real)) (: five-number-summary (seq -> real real real real real)) (: kurtosis (seq #!optional (or boolean real) -> real)) (: skewness (seq #!optional (or boolean real) -> real)) (: covariance (seq seq #!optional (or boolean real) -> real)) (: correlation (seq seq #!optional (or boolean real) -> real)) (: chi-square (seq (or real seq) #!optional boolean -> real)) ;;; ;from miscmacros (define-syntax define-parameter (syntax-rules () ((define-parameter name value guard) (define name (make-parameter value guard))) ((define-parameter name value) (define name (make-parameter value))) ((define-parameter name) (define name (make-parameter (void)))))) ;from moremacros (import-for-syntax (only (chicken base) symbol-append)) (define-syntax checked-guard (er-macro-transformer (lambda (frm rnm cmp) (##sys#check-syntax 'checked-guard frm '(_ symbol symbol . _)) (let ((_lambda (rnm 'lambda)) (_let (rnm 'let)) (_arg (rnm 'arg)) (?locnam (cadr frm)) (?typnam (caddr frm)) (?body (cdddr frm)) ) (let ((chknam (symbol-append 'check- (strip-syntax ?typnam)))) ;inject `(,_lambda (,_arg) (,chknam ',?locnam ,_arg) (,_let ((obj ,_arg)) ,@?body obj ) ) ) ) ) ) ) (define-syntax define-checked-parameter (syntax-rules () ((define-checked-parameter ?name ?init ?typnam ?body0 ...) (define-parameter ?name ?init (checked-guard ?name ?typnam ?body0 ...)) ) ) ) ;from check-errors (define (make-bad-argument-type-message #!optional argnam) (if (not argnam) "bad argument type" (format #f "bad `~A' argument type" argnam)) ) (define (make-type-name-tobe-message typnam) (format #f "not a ~A" typnam) ) (define (make-error-type-message typnam #!optional argnam) (string-append (make-bad-argument-type-message argnam) " - " (make-type-name-tobe-message typnam)) ) (define (alist? obj) (and (list? obj) (every pair? obj))) (define (error-alist loc obj #!optional argnam) (import (only (chicken string) conc)) (##sys#signal-hook #:type-error loc (make-error-type-message "alist" argnam) obj) ) (define (check-alist loc obj . args) (unless (alist? obj) (error-alist loc obj (let ((tmp args)) (if (null? tmp) #f (car tmp)))) ) obj ) ;;; ;; ;let actual float in data taint the result (define VERY-SMALL-EXACT (inexact->exact minimum-flonum)) (define VERY-LARGE-EXACT (inexact->exact maximum-flonum)) (define NORMAL-STATISTICS '( size max min arithmetic-mean (mean . (arithmetic-mean)) standard-deviation (sd . (standard-deviation)) (sigma . (standard-deviation)) )) (define VERBOSE-STATISTICS (append NORMAL-STATISTICS '( (95th . (percentile 95)) harmonic-mean geometric-mean median mode variance (var . (variance)) (sigma^2 . (variance)) ))) (define FULL-STATISTICS (append VERBOSE-STATISTICS '( kurtosis skewness interquartile-range five-number-summary ))) (define INITIAL-STATISTICS-SETS `( (normal . ,NORMAL-STATISTICS) (verbose . ,VERBOSE-STATISTICS) (full . ,FULL-STATISTICS) )) (define-checked-parameter statistics-sets INITIAL-STATISTICS-SETS alist) (define (checked-statistics-set-id->symbol obj) (case obj ((#f) 'normal) ((#t) 'verbose) (else (if (symbol? obj) obj (error 'statistics-set "not a valid stat-set-id" obj)))) ) (define (statistics-set stat-set-id) (let ((key (checked-statistics-set-id->symbol stat-set-id))) (alist-ref key (statistics-sets) eq?) ) ) ;; (define-constant ARITHMETIC-IDX 0) (define-constant HARMONIC-IDX 1) (define-constant GEOMETRIC-IDX 2) (define-constant SIGMA-IDX 3) (define-constant SIGMA^2-IDX 4) ;; (define (basic-statistics runs) (let*-values (((m h g) (mean runs)) ((s v) (standard-deviation runs m)) ) (vector m h g s v) ) ) ;; (define (generate-statistics runs #!optional stat-set-id) (let ((basics (basic-statistics runs)) (funcs (statistics-set stat-set-id)) ) (generate-statistics-alist runs funcs basics) ) ) (define (generate-statistics-alist runs funcs #!optional (basics (basic-statistics runs))) (and (list? funcs) (map (cut generate-statistics-sublist runs <> basics) funcs) ) ) (define (generate-statistics-sublist runs func #!optional (basics (basic-statistics runs))) (let* ((parameterized? (pair? func)) (func-key (if parameterized? (car func) func)) (func-name (if parameterized? (cadr func) func-key)) (func-params (if parameterized? (cddr func) '())) ) `(,func-key . ,(case func-name ((size) (seq:size runs) ) ((max) (seq:foldl max VERY-SMALL-EXACT runs) ) ((min) (seq:foldl min VERY-LARGE-EXACT runs) ) ((arithmetic-mean) (vector-ref basics ARITHMETIC-IDX) ) ((standard-deviation) (vector-ref basics SIGMA-IDX) ) ((percentile) (apply percentile runs func-params) ) ((harmonic-mean) (vector-ref basics HARMONIC-IDX) ) ((geometric-mean) (vector-ref basics GEOMETRIC-IDX) ) ((median) (median runs) ) ((mode) (mode runs) ) ((variance) (vector-ref basics SIGMA^2-IDX) ) (else (error 'statistic "unknown function" func-name)))) ) ) ;; (define (acc-update! acc i f) (set! (vector-ref acc i) (f (vector-ref acc i)))) ;seq => arithmetic harmonic geometric (define (mean runs) (let ((size (seq:size runs))) (if (fx= 0 size) (values +nan.0 +nan.0 +nan.0) (let* ((n size) (r (/ 1 n)) (acc (vector 0 0 1)) (accum! (lambda (acc value) ;minimize the acccumulated value (acc-update! acc GEOMETRIC-IDX (lambda (run) (let ((x (expt value r))) (* run (if (exact? value) (inexact->exact x) x))))) (acc-update! acc HARMONIC-IDX (lambda (run) (if (zero? value) run (+ run (/ 1 value))))) (acc-update! acc ARITHMETIC-IDX (lambda (run) (+ run (/ value n)))) acc ) ) (acc (seq:foldl accum! acc runs)) ) (values (vector-ref acc ARITHMETIC-IDX) (let ((h (vector-ref acc HARMONIC-IDX))) (if (zero? h) h (/ n h))) (vector-ref acc GEOMETRIC-IDX) ) ) ) ) ) ;; (define (median runs) (if (seq:empty? runs) +nan.0 (let* ((ordered (seq:sort runs <)) (mid (- (/ (fx+ (seq:size ordered) 1) 2) 1)) ) (if (integer? mid) (seq:elt ordered mid)) (/ (+ (seq:elt ordered (floor mid)) (seq:elt ordered (ceiling mid))) 2) ) ) ) ;; (define (mode runs) (define (occur> a b) (> (cdr a) (cdr b))) (if (seq:empty? runs) +nan.0 (let* ((srtd (seq:sort! (seq:histogram runs =) occur>)) (top-bin (seq:elt srtd 0)) (btm-bin (seq:elt srtd (fx- (seq:size srtd) 1))) ) ;when no distinction then no mode: go w/ the bottom (if (fx= (cdr top-bin) (cdr btm-bin)) (car btm-bin) (car top-bin)) ) ) ) ;; ;seq => arithmetic harmonic geometric (define (arithmetic-mean runs) (receive (m h g) (mean runs) m)) ;; ;; Computes the p-quantile for a set of numbers @runs (also known as the ;; "inverse cumulative distribution""). `p` is a real number between 0 and ;; 1.0. For instance, the 0.5-quantile corresponds to the median. (define (*quantile runs p) (if (seq:empty? runs) +nan.0 ;ceiling np must be valid index, so last index, not size (let ((np (* p (fx- (seq:size runs) 1))) (ordered (seq:sort runs <)) ) (arithmetic-mean (seq:sequence runs (seq:elt ordered (inexact->exact (floor np))) (seq:elt ordered (inexact->exact (ceiling np))))) ) ) ) (define (quantile runs p) (assert (and (real? p) (<= 0 p 1)) 'quantile "not in [0 .. 1]" p) (*quantile runs p) ) (define (percentile runs #!optional (p 95)) (assert (and (real? p) (<= 0 p 100)) 'percentile "not in [0 .. 100]" p) (*quantile runs (/ p 100)) ) ;seq => Q(.75) - Q(.25) (define (interquartile-range runs) (- (*quantile runs 3/4) (*quantile runs 1/4))) ;seq => Q(0.0) Q(.25) Q(.50) Q(.75) Q(1.0) (define (five-number-summary runs) (values (*quantile runs 0) (*quantile runs 1/4) (*quantile runs 1/2) (*quantile runs 3/4) (*quantile runs 1)) ) ;; ;seq => sigma sigma^2 (define (standard-deviation runs #!optional (bias #t)) (let* ((var (variance runs bias)) (sd (sqrt var)) ) (values (if (exact? var) (inexact->exact sd) sd) var)) ) (define (stddev runs #!optional (bias #t)) (receive (sd var) (standard-deviation runs bias) sd) ) ;; Computes the average absolute difference between the numbers in list @runs and ;; (median @runs). (define (absolute-deviation runs) (let ((siz (seq:size runs))) (if (zero? siz) +nan.0 (let ((m (arithmetic-mean runs))) (max 0 (/ (seq:foldl (lambda (acc elm) (+ acc (abs (- elm m)))) runs) siz)))) ) ) ;; Computes the variance for a set of numbers @runs, optionally applying ;; bias correction. @runs is a proper list of numeric values. Bias correction gets ;; enabled by setting `bias` to `#t`. Alternatively, it is possible to provide a ;; a positive integer, which is used instead of the number of elements in @runs. (define (variance runs #!optional (bias #t)) (let ((m (arithmetic-mean runs)) (siz (seq:size runs)) ) (let ((acc (seq:foldl (lambda (acc elm) (+ acc (expt (- elm m) 2))) 0 runs))) (/ acc (if (integer? bias) bias (- siz (if bias 1 0)))) ) ) ) ;; ;; Computes the skewness for a set of numbers @runs, optionally applying ;; bias correction. @runs is a proper list of numeric values. Bias correction gets ;; enabled by setting `bias` to `#t`. Alternatively, it is possible to provide a ;; a positive integer, which is used instead of the number of elements in @runs. (define (skewness runs #!optional (bias #t)) (let ((n (seq:size runs)) (m (arithmetic-mean runs)) (s (stddev runs bias)) ) (* (/ (seq:foldl (lambda (z x) (+ z (/ (expt (- x m) 3) n))) 0 runs) (expt s 3)) (if bias (let ((n (if (integer? bias) bias n))) (/ (sqrt (* n (- n 1))) (- n 2))) 1)) ) ) ;; Computes the kurtosis for a set of numbers @runs, optionally applying ;; bias correction. @runs is a proper list of numeric values. Bias correction gets ;; enabled by setting `bias` to `#t`. Alternatively, it is possible to provide a ;; a positive integer, which is used instead of the number of elements in @runs. (define (kurtosis runs #!optional (bias #t)) (let ((n (seq:size runs)) (m (arithmetic-mean runs)) (s (stddev runs bias))) (/ (seq:foldl (lambda (z x) (+ z (expt (- x m) 4))) 0 runs) (* (expt s 4) (if (integer? bias) bias (if bias (- n 1) n)))) ) ) ;; ;; Computes the covariance of two sets of numbers @runs1 and @runs2. Both @runs1 and ;; @runs2 are sequences of numbers. Bias correction can be enabled by setting ;; `bias` to `#t`. Alternatively, it is possible to provide a positive integer, ;; which is used instead of the number of elements in @runs1. (define (covariance runs1 runs2 #!optional (bias #t)) (let ((n (seq:size runs1))) (assert (= n (seq:size runs2))) (let ((xm (arithmetic-mean runs1)) (ym (arithmetic-mean runs2))) ;FIXME need 2 seq foldl, coercion expensive (/ (fold (lambda (x y z) (+ z (* (- x xm) (- y ym)))) 0 (seq:->list runs1) (seq:->list runs2)) (if (integer? bias) bias (- n (if bias 1 0)))) ) ) ) ;; Computes the correlation of two sets of numbers @runs1 and @runs2 in form of ;; the Pearson product-moment correlation coefficient_. Both @runs1 and ;; @runs2 are sequences of numbers. Bias correction can be enabled by setting ;; `bias` to `#t`. Alternatively, it is possible to provide a positive integer, ;; which is used instead of the number of elements in @runs1. (define (correlation runs1 runs2 #!optional (bias #t)) (/ (covariance runs1 runs2 bias) (* (stddev runs1 bias) (stddev runs2 bias))) ) ;; (: chi-square-component (real real -> real)) (: yates-chi-square-component (real real -> real)) (define (yates-chi-square-component o e) (let ((r (- (abs (- o e)) 1/2))) (/ (* r r) e) ) ) (define (chi-square-component o e) (let ((r (- o e))) (/ (* r r) e) ) ) (define (chi-components component observed expected) (define (xpct-elt-comp s i) (component (seq:elt s i) (seq:elt expected (seq:index i))) ) (if (seq:sequence? expected) (seq:smap* #() xpct-elt-comp observed) (seq:smap #() (cut component <> expected) observed) ) ) (define (chi-square observed expected #!optional yates?) ; (assert (seq:sequence? observed) 'chi-square "observed not a sequence" observed expected yates?) (assert (or (seq:sequence? expected) (real? expected)) 'chi-square "expected not a sequence or real" observed expected yates?) (when (and (seq:sequence? observed) (seq:sequence? expected)) (assert (= (seq:size expected) (seq:size observed)) 'chi-square "length observed not same length expected" observed expected yates?) ) ; (let ((component (if yates? yates-chi-square-component chi-square-component))) (seq:reduce + 0 (chi-components component observed expected)) ) ) ) ;module micro-stats