;;;; ;;Issues ;; ;;- see the haskell benchmark suite (? name ?) ;; ;;- missing `nice` argument checking (module micro-stats (;export ; statistics-sets statistics-set basic-statistics generate-statistics generate-statistics-alist ; standard-deviation* variance standard-deviation absolute-deviation mean* arithmetic-mean harmonic-mean geometric-mean mean median mode quantile percentile interquartile-range five-number-summary ;#| ;FIXME need to validate kurtosis skewness covariance correlation chi-square ;|# ) (import scheme (chicken base) (chicken type) (chicken syntax) (chicken fixnum) (chicken flonum) (chicken foreign) (only (chicken format) format) (only (srfi 1) first list-copy every fold) (prefix sequences seq:) (prefix (sequences utils misc) seq:) (prefix (sequences utils sort) seq:) (prefix (sequences utils sample) seq:)) (include-relative "micro-stats-types") (define-type basic-stats-vector (vector real real real real real)) (: statistics-sets (#!optional alist -> alist)) (: statistics-set (stats-set-id -> alist)) (: basic-statistics (seq -> basic-stats-vector)) (: generate-statistics (seq #!optional (or boolean symbol) -> (or false stats-alist))) (: generate-statistics-alist (seq (or false list) #!optional basic-stats-vector -> (or false stats-alist))) (: variance (seq #!optional (or boolean integer) -> real)) (: standard-deviation* (seq #!optional (or boolean integer) -> real real)) (: standard-deviation (seq #!optional (or boolean integer) -> real)) (: absolute-deviation (seq -> real)) (: mean* (seq -> real real real)) (: arithmetic-mean (seq -> real)) (: harmonic-mean (seq -> real)) (: geometric-mean (seq -> real)) (: mean (seq -> real)) (: mode (seq -> real)) (: median (seq -> real)) (: quantile (seq real -> real)) (: percentile (seq #!optional real -> real)) (: interquartile-range (seq -> real)) (: five-number-summary (seq -> real real real real real)) (: kurtosis (seq #!optional (or boolean integer) -> real)) (: skewness (seq #!optional (or boolean integer) -> real)) (: covariance (seq seq #!optional (or boolean integer) -> real)) (: correlation (seq seq #!optional (or boolean integer) -> real)) (: chi-square (seq (or real seq) #!optional boolean -> real)) ;;; ;from miscmacros (define-syntax define-parameter (syntax-rules () ((define-parameter name value guard) (define name (make-parameter value guard))) ((define-parameter name value) (define name (make-parameter value))) ((define-parameter name) (define name (make-parameter (void)))))) ;from moremacros (import-for-syntax (only (chicken base) symbol-append)) (define-syntax checked-guard (er-macro-transformer (lambda (frm rnm cmp) (##sys#check-syntax 'checked-guard frm '(_ symbol symbol . _)) (let ((_lambda (rnm 'lambda)) (_let (rnm 'let)) (_arg (rnm 'arg)) (?locnam (cadr frm)) (?typnam (caddr frm)) (?body (cdddr frm)) ) (let ((chknam (symbol-append 'check- (strip-syntax ?typnam)))) ;inject `(,_lambda (,_arg) (,chknam ',?locnam ,_arg) (,_let ((obj ,_arg)) ,@?body obj ) ) ) ) ) ) ) (define-syntax define-checked-parameter (syntax-rules () ((define-checked-parameter ?name ?init ?typnam ?body0 ...) (define-parameter ?name ?init (checked-guard ?name ?typnam ?body0 ...)) ) ) ) ;from check-errors (define (make-bad-argument-type-message #!optional argnam) (if (not argnam) "bad argument type" (format #f "bad `~A' argument type" argnam)) ) (define (make-type-name-tobe-message typnam) (format #f "not a ~A" typnam) ) (define (make-error-type-message typnam #!optional argnam) (string-append (make-bad-argument-type-message argnam) " - " (make-type-name-tobe-message typnam)) ) (define (alist? obj) (and (list? obj) (every pair? obj))) (define (error-alist loc obj #!optional argnam) (import (only (chicken string) conc)) (##sys#signal-hook #:type-error loc (make-error-type-message "alist" argnam) obj) ) (define (check-alist loc obj . args) (unless (alist? obj) (error-alist loc obj (let ((tmp args)) (if (null? tmp) #f (car tmp)))) ) obj ) ;;; ;; ;let actual float in data taint the result (define VERY-SMALL-EXACT (inexact->exact minimum-flonum)) (define VERY-LARGE-EXACT (inexact->exact maximum-flonum)) (define NORMAL-STATISTICS '( size max min arithmetic-mean (mean . (arithmetic-mean)) standard-deviation (sd . (standard-deviation)) (sigma . (standard-deviation)) )) (define VERBOSE-STATISTICS (append NORMAL-STATISTICS '( (95th . (percentile 95)) harmonic-mean geometric-mean median mode variance (var . (variance)) (sigma^2 . (variance)) ))) (define FULL-STATISTICS (append VERBOSE-STATISTICS '( kurtosis skewness interquartile-range five-number-summary ))) (define INITIAL-STATISTICS-SETS `( (normal . ,NORMAL-STATISTICS) (verbose . ,VERBOSE-STATISTICS) (full . ,FULL-STATISTICS) )) (define-checked-parameter statistics-sets INITIAL-STATISTICS-SETS alist) (define (checked-statistics-set-id->symbol obj) (case obj ((#f) 'normal) ((#t) 'verbose) (else (if (memq obj '(normal verbose full)) obj (error 'statistics-set "not a valid stat-set-id" obj)))) ) (define (statistics-set stat-set-id) (let ((key (checked-statistics-set-id->symbol stat-set-id))) (alist-ref key (statistics-sets) eq?) ) ) (define-constant ARITHMETIC-IDX 0) (define-constant HARMONIC-IDX 1) (define-constant GEOMETRIC-IDX 2) (define-constant SIGMA-IDX 3) (define-constant SIGMA^2-IDX 4) (define (normal-stats runs) (vector (arithmetic-mean runs) +nan.0 +nan.0 (standard-deviation runs) +nan.0) ) (define (verbose-stats runs) (let*-values (((m h g) (mean* runs)) ((s v) (standard-deviation* runs m)) ) (vector m h g s v) ) ) (define (needs-verbose-stats? funcs) (define (set-has-verbose-item! flags func) (let* ((parameterized? (pair? func)) (func-key (if parameterized? (car func) func)) (func-name (if parameterized? (cadr func) func-key)) ) (case func-name ((harmonic-mean) (vector-set! flags 0 #t)) ((geometric-mean) (vector-set! flags 1 #t)) ((variance) (vector-set! flags 2 #t)) ) flags ) ) (let ((flags (foldl set-has-verbose-item! (vector #f #f #f) funcs))) (and (vector-ref flags 0) (vector-ref flags 1) (vector-ref flags 2)) ) ) (define (basic-stats runs funcs) ((if (needs-verbose-stats? funcs) verbose-stats normal-stats) runs) ) (define (gen-stats-item runs func statvec) (let* ((parameterized? (pair? func)) (func-key (if parameterized? (car func) func)) (func-name (if parameterized? (cadr func) func-key)) (func-params (if parameterized? (cddr func) '())) ) `(,func-key . ,(case func-name ((size) (seq:size runs) ) ((max) (seq:foldl max VERY-SMALL-EXACT runs) ) ((min) (seq:foldl min VERY-LARGE-EXACT runs) ) ((arithmetic-mean) (vector-ref statvec ARITHMETIC-IDX) ) ((standard-deviation) (vector-ref statvec SIGMA-IDX) ) ((percentile) (apply percentile runs func-params) ) ((harmonic-mean) (vector-ref statvec HARMONIC-IDX) ) ((geometric-mean) (vector-ref statvec GEOMETRIC-IDX) ) ((median) (median runs) ) ((mode) (mode runs) ) ((variance) (vector-ref statvec SIGMA^2-IDX) ) (else (error 'statistic "unknown function" func-name)))) ) ) ; (define basic-statistics verbose-stats) (define (generate-statistics runs #!optional stat-set-id) (generate-statistics-alist runs (statistics-set stat-set-id)) ) (define (generate-statistics-alist runs funcs #!optional statvec) (and (list? funcs) (map (cut gen-stats-item runs <> (or statvec (basic-stats runs funcs))) funcs) ) ) ;; (define (acc-update! acc i f) (set! (vector-ref acc i) (f (vector-ref acc i)))) (define (geometric-accum n) (let ((r (/ 1 n))) (lambda (acc run) (let ((x (expt run r))) (* acc (if (exact? run) (inexact->exact x) x)) ) ) ) ) (define ((harmonic-accum n) acc run) (if (zero? run) acc (+ acc (/ 1 run))) ) (define ((arithmetic-accum n) acc run) (+ acc (/ run n)) ) ;seq => arithmetic harmonic geometric (define (mean* runs) (let ((n (seq:size runs))) (if (fx= 0 n) (values +nan.0 +nan.0 +nan.0) (let* ((accums (vector 0 0 1)) (arit-acc (arithmetic-accum n)) (harm-acc (harmonic-accum n)) (geom-acc (geometric-accum n)) (accum! (lambda (accs run) ;minimize the acccumulated value (acc-update! accs ARITHMETIC-IDX (cut arit-acc <> run)) (acc-update! accs HARMONIC-IDX (cut harm-acc <> run)) (acc-update! accs GEOMETRIC-IDX (cut geom-acc <> run)) accs ) ) (accums (seq:foldl accum! accums runs)) ) (values (vector-ref accums ARITHMETIC-IDX) (let ((h (vector-ref accums HARMONIC-IDX))) (if (zero? h) h (/ n h))) (vector-ref accums GEOMETRIC-IDX) ) ) ) ) ) ;Returns the arithmetic mean of a set of numbers @runs. @runs is a ;sequence of numeric values. (define (arithmetic-mean runs) (seq:foldl (arithmetic-accum (seq:size runs)) 0 runs) ) (define (harmonic-mean runs) (let* ((n (seq:size runs)) (h (seq:foldl (harmonic-accum n) 0 runs)) ) (if (zero? h) h (/ n h)) ) ) (define (geometric-mean runs) (seq:foldl (geometric-accum (seq:size runs)) 1 runs) ) (define mean arithmetic-mean) ;Returns the median for a set of numbers @runs. (define (median runs) (if (seq:empty? runs) +nan.0 (let* ((ordered (seq:sort runs <)) (mid (- (/ (fx+ (seq:size ordered) 1) 2) 1)) ) (if (integer? mid) (seq:elt ordered mid)) (/ (+ (seq:elt ordered (floor mid)) (seq:elt ordered (ceiling mid))) 2) ) ) ) ;Returns the mode of a set of numbers @runs. The mode is the value that ;appears most often in @runs. @runs is a sequence of numeric values. ;{=} is used as the equality operator. (define (mode runs) (define (occur> a b) (> (cdr a) (cdr b))) (if (seq:empty? runs) +nan.0 (let* ((srtd (seq:sort! (seq:histogram runs =) occur>)) (top-bin (seq:elt srtd 0)) (btm-bin (seq:elt srtd (fx- (seq:size srtd) 1))) ) ;when no distinction then no mode: go w/ the bottom (if (fx= (cdr top-bin) (cdr btm-bin)) (car btm-bin) (car top-bin)) ) ) ) ;; (define (*quantile runs p) (if (seq:empty? runs) +nan.0 ;ceiling np must be valid index, so last index, not size (let ((np (* p (fx- (seq:size runs) 1))) (ordered (seq:sort runs <)) ) (arithmetic-mean (seq:sequence runs (seq:elt ordered (inexact->exact (floor np))) (seq:elt ordered (inexact->exact (ceiling np))))) ) ) ) ;Returns the p-quantile for a set of numbers @runs (also known as the ;"inverse cumulative distribution""). @p is a real number between 0 and ;1.0. For instance, the 0.5-quantile corresponds to the median. (define (quantile runs p) (assert (and (real? p) (<= 0 p 1)) 'quantile "not in [0 .. 1]" p) (*quantile runs p) ) ;Returns the percentile for a set of numbers @runs and a given percentage ;@pct. @pct is a number between 0 and 100. For instance, the 90th ;percentile corresponds to the 0.9-quantile. (define (percentile runs #!optional (pct 95)) (assert (and (real? pct) (<= 0 pct 100)) 'percentile "not in [0 .. 100]" pct) (*quantile runs (/ pct 100)) ) ;Returns the interquartile range for a given set of numbers @runs. @runs ;is a proper list of numeric values. The interquartile range is the ;difference between the 0.75-quantile and the 0.25-quantile. (define (interquartile-range runs) (- (*quantile runs 3/4) (*quantile runs 1/4))) ;Returns a list of 5 statistics describing the set of numbers @runs: the ;minimum value, the lower quartile, the median, the upper quartile, and ;the maximum value. (define (five-number-summary runs) (values (*quantile runs 0) (*quantile runs 1/4) (*quantile runs 1/2) (*quantile runs 3/4) (*quantile runs 1)) ) ;; ;NOTE About Data Input Form ; ;All @runs* are a {sequence} of numbers. ;NOTE About Bias Correction ; ;Bias correction gets disabled by setting @bias to {#f}. Alternatively, ;it is possible to provide a positive integer, which is used instead of ;the number of elements. ;Returns the variance for a set of numbers @runs, optionally modifying ;the bias correction. (define (variance runs #!optional (bias #t)) (let ((siz (seq:size runs))) (if (<= 0 siz 1) 0 (let* ((m (arithmetic-mean runs)) (acc (seq:foldl (lambda (acc elm) (+ acc (expt (- elm m) 2))) 0 runs)) ) (/ acc (if (integer? bias) bias (- siz (if bias 1 0)))) ) ) ) ) ;seq => sd var (define (standard-deviation* runs #!optional (bias #t)) (let* ((var (variance runs bias)) (sd (sqrt var)) ) (values (if (exact? var) (inexact->exact sd) sd) var)) ) ;Returns the standard deviation for a set of numbers @runs, optionally ;modifying the bias correction. (define (standard-deviation runs #!optional (bias #t)) (receive (sd var) (standard-deviation* runs bias) sd) ) ;Returns the average absolute difference between the set of numbers ;@runs and it's median. (define (absolute-deviation runs) (let ((siz (seq:size runs))) (if (zero? siz) +nan.0 (let ((m (arithmetic-mean runs))) (max 0 (/ (seq:foldl (lambda (acc elm) (+ acc (abs (- elm m)))) runs) siz)))) ) ) ;; ;Returns the skewness for a set of numbers @runs, optionally modifying ;the bias correction. (define (skewness runs #!optional (bias #t)) (let ((n (seq:size runs)) (m (arithmetic-mean runs)) (s (standard-deviation runs bias)) ) (* (/ (seq:foldl (lambda (z x) (+ z (/ (expt (- x m) 3) n))) 0 runs) (expt s 3)) (if bias (let ((n (if (integer? bias) bias n))) (/ (sqrt (* n (- n 1))) (- n 2))) 1)) ) ) ;Returns the kurtosis for a set of numbers @runs, optionally modifying ;the bias correction. (define (kurtosis runs #!optional (bias #t)) (let ((n (seq:size runs)) (m (arithmetic-mean runs)) (s (standard-deviation runs bias))) (/ (seq:foldl (lambda (z x) (+ z (expt (- x m) 4))) 0 runs) (* (expt s 4) (if (integer? bias) bias (if bias (- n 1) n)))) ) ) ;; ;Returns the covariance of two sets of numbers @runs1 and @runs2, ;optionally modifying the bias correction. (define (covariance runs1 runs2 #!optional (bias #t)) (let ((n (seq:size runs1))) (assert (= n (seq:size runs2)) 'covariance "runs must be same size" n (seq:size runs2)) (let ((xm (arithmetic-mean runs1)) (ym (arithmetic-mean runs2))) ;FIXME need 2 seq foldl, coercion expensive (/ (fold (lambda (x y z) (+ z (* (- x xm) (- y ym)))) 0 (seq:->list runs1) (seq:->list runs2)) (if (integer? bias) bias (- n (if bias 1 0)))) ) ) ) ;Returns the correlation of two sets of numbers, @runs1 and @runs2, in ;the form of the Pearson product-moment correlation coefficient, ;optionally modifying the bias correction. (define (correlation runs1 runs2 #!optional (bias #t)) (/ (covariance runs1 runs2 bias) (* (standard-deviation runs1 bias) (standard-deviation runs2 bias))) ) ;; (define (yates-chi-square-component o e) (let ((r (- (abs (- o e)) 1/2))) (/ (* r r) e) ) ) (define (pearson-chi-square-component o e) (let ((r (- o e))) (/ (* r r) e) ) ) (define (chi-components component observed expected) (define (xpct-elt-comp s i) (component (seq:elt s i) (seq:elt expected (seq:index i))) ) (if (seq:sequence? expected) (seq:smap* #() xpct-elt-comp observed) (seq:smap #() (cut component <> expected) observed) ) ) ; ;Returns the chi-square of two sets of numbers, @observed and @expected, ;using the Pearson component. The Yates's correction component gets used ;by setting @yates? to {#t}. (define (chi-square observed expected #!optional yates?) ; (assert (seq:sequence? observed) 'chi-square "observed not a sequence" observed expected yates?) (assert (or (seq:sequence? expected) (real? expected)) 'chi-square "expected not a sequence or real" observed expected yates?) (when (and (seq:sequence? observed) (seq:sequence? expected)) (assert (= (seq:size expected) (seq:size observed)) 'chi-square "length observed not same length expected" observed expected yates?) ) ; (let ((component (if yates? yates-chi-square-component pearson-chi-square-component))) (seq:reduce + 0 (chi-components component observed expected)) ) ) ) ;module micro-stats