;;;; ;;Issues ;; ;;- see the haskell benchmark suite (? name ?) (module micro-stats (;export statistics-sets statistics-set basic-statistics generate-statistics generate-statistics-alist percentile mean median mode standard-deviation chi-square chi-squares) (import scheme (chicken base) (chicken type) (chicken syntax) (chicken flonum) (chicken foreign) (only (srfi 1) first list-copy) (prefix sequences seq:) (prefix seq-utils seq*:)) ;;; (include "micro-stats-types") (: statistics-sets (#!optional alist -> alist)) (: statistics-set (statistics-set-id -> alist)) (: basic-statistics (seq --> (vector-of real))) (: generate-statistics (seq #!optional (or symbol boolean) --> (or boolean statistics-alist))) (: generate-statistics-alist (seq (or boolean list) #!optional (vector-of real) --> (or boolean statistics-alist))) (: generate-statistics-sublist (seq list #!optional vector --> (pair symbol *))) (: variance (seq real --> real)) (: standard-deviation (seq #!optional real --> real real)) (: arithmetic-mean (seq --> real)) (: mode (seq #!optional binary-predicate --> real)) (: median (seq #!optional binary-predicate --> real)) (: mean (seq --> real real real)) (: percentile (seq #!optional real binary-predicate --> real)) (: chi-sample (seq (or real seq) --> real)) (: chi-squares (seq seq --> seq)) (: chi-square (seq (or real seq) --> real)) ;;; ;from miscmacros (define-syntax define-parameter (syntax-rules () ((define-parameter name value guard) (define name (make-parameter value guard))) ((define-parameter name value) (define name (make-parameter value))) ((define-parameter name) (define name (make-parameter (void)))))) ;from moremacros (import-for-syntax (only (chicken base) symbol-append)) (define-syntax checked-guard (er-macro-transformer (lambda (frm rnm cmp) (##sys#check-syntax 'checked-guard frm '(_ symbol symbol . _)) (let ( (?locnam (cadr frm)) (?typnam (caddr frm)) (?body (cdddr frm)) (_lambda (rnm 'lambda)) ) `(,_lambda (obj) (,(symbol-append 'check- (strip-syntax ?typnam)) ',?locnam obj) ,@?body obj ) ) ) ) ) (define-syntax define-checked-parameter (syntax-rules () ((define-checked-parameter ?name ?init ?typnam ?body0 ...) (define-parameter ?name ?init (checked-guard ?name ?typnam ?body0 ...)) ) ) ) ;from check-errors (define (alist? obj) (or (null? obj) (and (pair? obj) (let loop ((ls obj)) (or (null? ls) (and ;since anything can be a key no stronger check possible (pair? (car ls)) (loop (cdr ls) ) ) ) ) ) ) ) (define (check-alist loc obj . args) (unless (alist? obj) (error-alist loc obj (let ((tmp args)) (if (null? tmp) #f (car tmp)))) ) obj ) (define (error-alist loc obj #!optional argnam) (import (only (chicken string) conc)) (##sys#signal-hook #:type-error loc (string-append (if (not argnam) "bad argument" (conc "bad `" argnam "' argument")) " type - not " "an " "alist") obj) ) ;;; ;; (define *empty-vector* (vector)) (define NORMAL-STATISTICS '( size max min arithmetic-mean (mean . (arithmetic-mean)) standard-deviation (sd . (standard-deviation)) (sigma . (standard-deviation)) )) (define VERBOSE-STATISTICS (append NORMAL-STATISTICS '( (95th . (percentile 95)) harmonic-mean geometric-mean median mode variance (var . (variance)) (sigma^2 . (variance))) )) (define INITIAL-STATISTICS-SETS `( (normal . ,NORMAL-STATISTICS) (verbose . ,VERBOSE-STATISTICS) )) (define-checked-parameter statistics-sets INITIAL-STATISTICS-SETS alist) (define (statistics-set stat-set-id) (and-let* ( (stat-set-id (case stat-set-id ((#f) 'normal) ((#t) 'verbose) (else (if (symbol? stat-set-id) stat-set-id (error 'statistics-set "not a valid stat-set-id" stat-set-id))))) ) (alist-ref stat-set-id (statistics-sets) eq?) ) ) ;; (define-constant ARITHMETIC-IDX 0) (define-constant HARMONIC-IDX 1) (define-constant GEOMETRIC-IDX 2) (define-constant SIGMA-IDX 3) (define-constant SIGMA^2-IDX 4) ;; (define (basic-statistics runs) (let*-values ( ((m h g) (mean runs)) ((s v) (standard-deviation runs m)) ) (vector m h g s v) ) ) ;; (define (generate-statistics runs #!optional stat-set-id) (let ( (basics (basic-statistics runs)) (funcs (statistics-set stat-set-id)) ) (generate-statistics-alist runs funcs basics) ) ) (define (generate-statistics-alist runs funcs #!optional (basics (basic-statistics runs))) (and (list? funcs) (map (cut generate-statistics-sublist runs <> basics) funcs) ) ) (define (generate-statistics-sublist runs func #!optional (basics (basic-statistics runs))) (let* ( (parameterized? (pair? func)) (func-key (if parameterized? (car func) func)) (func-name (if parameterized? (cadr func) func-key)) (func-params (if parameterized? (cddr func) '())) ) `(,func-key . ,(case func-name ((size) (seq:size runs) ) ((max) (seq:foldl max minimum-flonum runs) ) ((min) (seq:foldl min maximum-flonum runs) ) ((arithmetic-mean) (vector-ref basics ARITHMETIC-IDX) ) ((standard-deviation) (vector-ref basics SIGMA-IDX) ) ((percentile) (apply percentile runs func-params) ) ((harmonic-mean) (vector-ref basics HARMONIC-IDX) ) ((geometric-mean) (vector-ref basics GEOMETRIC-IDX) ) ((median) (median runs) ) ((mode) (mode runs) ) ((variance) (vector-ref basics SIGMA^2-IDX) ) (else (error 'statistic "unknown function" func-name)))) ) ) ;; (define (percentile runs #!optional (p 95) (less? <)) (unless (and (real? p) (<= 0 p) (< p 100)) (error 'percentile' "not in [0 .. 100)" p) ) (if (seq:empty? runs) +nan.0 (let* ( (ordered (seq*:sort runs less?)) (size (seq:size ordered)) ;size-of vector (rank (round (* size (/ p 100)))) (idx (max 0 (- rank 1))) ) ;idx = 0 => min( max( ordered(idx) (seq:elt ordered idx) ) ) ) ;; (define (acc-update! acc i f) (set! (vector-ref acc i) (f (vector-ref acc i)))) ;seq => arithmetic harmonic geometric (define (mean runs) (let ((size (seq:size runs))) (if (zero? size) (values 0 0 0) (let* ( (n size) (r (/ 1 n)) (acc (vector 0 0 1)) (accum! (lambda (acc value) ;minimize the acccumulated value (acc-update! acc GEOMETRIC-IDX (lambda (run) (* run (expt value r)))) (acc-update! acc HARMONIC-IDX (lambda (run) (if (zero? value) run (+ run (/ 1 value))))) (acc-update! acc ARITHMETIC-IDX (lambda (run) (+ run (/ value n)))) acc ) ) (acc (seq:foldl accum! acc runs)) (harmonic-mean (let ((h (vector-ref acc HARMONIC-IDX))) (if (zero? h) 0 (/ n h)))) ) (values (vector-ref acc ARITHMETIC-IDX) harmonic-mean (vector-ref acc GEOMETRIC-IDX) ) ) ) ) ) ;; (define (median runs #!optional (less? <)) (let* ( (ordered (seq*:sort runs less?)) (size (seq:size ordered)) ;size-of vector (median (- (round (/ (add1 size) 2)) 1)) ) (cond ((seq:empty? runs) 0.0) ((exact? median) (seq:elt ordered median)) (else (let ((i (inexact->exact (floor median)))) (/ (+ (seq:elt ordered i) (seq:elt ordered (add1 i))) 2) ) ) ) ) ) ;; (define (mode runs #!optional (eqal? =)) (define (occur> a b) (> (cdr a) (cdr b))) (if (seq:empty? runs) 0.0 (let ((top-bin (first (seq*:sort! (seq*:histogram runs eqal?) occur>)))) (car top-bin)) ) ) ;; ;seq => arithmetic harmonic geometric (define (arithmetic-mean runs) (let-values (((m h g) (mean runs))) m)) ;; ;seq => sigma sigma^2 (define (standard-deviation runs #!optional (m (arithmetic-mean runs))) (let ((var (variance runs m))) (values (sqrt var) var)) ) ;; (define (variance runs m) (let ((acc 0) (siz 0)) (seq:for (lambda (elm) (set! acc (+ acc (expt (- elm m) 2))) (set! siz (add1 siz)) ) runs) (if (< siz 2) 0.0 (/ acc (sub1 siz))) ) ) ;; ;observed : (seq-of real) ;expected : (or real (seq-of real)) ; (define (chi-square observed expected) (seq:smap *empty-vector* (cut chi-sample <> expected) observed) ) ;samples : (seq-of observed) ;observed : (seq-of real) ;expected : (or real (seq-of real)) ; (define (chi-squares samples expected) (seq:smap *empty-vector* (cut chi-square <> expected) samples) ) ;; (define (component o e) (let ((r (- o e))) (/ (* r r) e))) ;observed : (seq-of real) ;expected : (or real (seq-of real)) ; (define (chi-sample observed expected) (let ( (components (if (seq:sequence? expected) (seq:smap* *empty-vector* (lambda (value it) (component value (seq:elt expected (seq:index it)))) observed) (seq:smap *empty-vector* (cut component <> expected) observed) ) ) ) (seq:foldl + 0 components) ) ) ) ;module micro-stats