;;; libsvm software wrapped for Chicken Scheme. ;;; Copyright (c) Peter Lane, 2010. ;;; This program is free software: you can redistribute it and/or modify ;;; it under the terms of the GNU General Public License as published by ;;; the Free Software Foundation, either version 3 of the License, or ;;; (at your option) any later version. ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see . ;;; ------------------------------------------------------------------------------- (module libsvm (export C-SVC NU-SVC ONE-CLASS EPSILON-SVR NU-SVR LINEAR POLY RBF SIGMOID PRECOMPUTED make-problem make-svm-node make-svm-parameter problem-get-instance problem-get-instance-values problem-get-label problem-num-instances read-problem svm-no-print-function svm-train svm-save-model svm-load-model svm-get-svm-type svm-get-nr-class svm-get-svr-probability svm-predict svm-free-model-content svm-destroy-model svm-destroy-param svm-check-parameter svm-check-probability-model ) (import chicken extras foreign scheme) #> #include "svm.h" <# ;; svm types (define C-SVC 0) (define NU-SVC 1) (define ONE-CLASS 2) (define EPSILON-SVR 3) (define NU-SVR 4) ;; kernel types (define LINEAR 0) (define POLY 1) (define RBF 2) (define SIGMOID 3) (define PRECOMPUTED 4) ;; problem definition should be a list of lists, ;; each sublist defining an instance, in one of following formats: ;; - label feature-value-0 feature-value-1 ... ;; - label (index value) (index value) ... (TODO) (define (make-problem definition) (unless (and (list? definition) (> (length definition) 0) ; must be at least one instance (list? (car definition)) (> (length (car definition)) 1) ; instance must have a label and at least one feature ) (error "make-problem: invalid definition")) (if (list? (cadar definition)) ; handle sparse vector definition (let* ((max-feature-index (apply max (map car (apply append (map cdr definition))))) (problem (create-svm-problem (length definition) max-feature-index)) (i 0)) (for-each (lambda (instance) (problem-set-label! problem i (exact->inexact (car instance))) (let ((j 0)) ; to record the end value position (for-each (lambda (feature-defn) (problem-set-value! problem i (car feature-defn) (exact->inexact (cadr feature-defn))) (set! j (+ 1 j))) (cdr instance)) (problem-set-end-value! problem i j)) (set! i (+ 1 i))) definition) problem) (let ((problem (create-svm-problem (length definition) (- (length (car definition)) 1))) (i 0)) (for-each (lambda (instance) (problem-set-label! problem i (exact->inexact (car instance))) (let ((j 0)) (for-each (lambda (value) (problem-set-value! problem i j (exact->inexact value)) (set! j (+ 1 j))) (cdr instance)) (problem-set-end-value! problem i j)) (set! i (+ 1 i))) definition) problem))) (define create-svm-problem (foreign-lambda* (c-pointer (struct "svm_problem")) ((int num_instances) (int num_features)) "int i; struct svm_problem * problem = malloc(sizeof(struct svm_problem)); problem->l = num_instances; problem->x = malloc(sizeof(struct svm_node *) * num_instances); struct svm_node * x_space; x_space = malloc(sizeof(struct svm_node) * (1 + num_features) * num_instances); for (i = 0; i < problem->l; ++i) { problem->x[i] = &x_space[i * (1 + num_features)]; } problem->y = malloc(sizeof(double) * num_instances); C_return(problem);")) (define problem-set-label! (foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (double value)) "problem->y[index] = value;")) (define problem-set-value! (foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (int feature) (double value)) "problem->x[index][feature].index = feature; problem->x[index][feature].value = value;")) (define problem-set-end-value! (foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (int feature)) "problem->x[index][feature].index = -1; problem->x[index][feature].value = 0.0;")) (define make-svm-node (foreign-lambda* (c-pointer (struct "svm_node")) ((int index) (double value)) "struct svm_node * node = malloc(sizeof(struct svm_node)); node->index = index; node->value = value; C_return(node);" )) ;; returns a c-pointer to a svm_parameter (define (make-svm-parameter #!key (svm-type C-SVC) (kernel-type LINEAR) (degree 3) ; for poly (gamma 0.0) ; for poly/rbf/sigmoid (coef0 0.0) ; for poly/sigmoid ;; these are for training only (cache-size 100.0) ; in MB (eps 0.001) ; stopping criteria (C 1.0) ; for C_SVC, EPSILON_SVR and NU_SVR (nr-weight 0) ; for C_SVC ; int *weight_label; for C_SVC ;; ?? ; double* weight; for C_SVC ;; ?? (nu 0.5) ; for NU_SVC, ONE_CLASS, and NU_SVR (p 0.1) ; for EPSILON_SVR (shrinking 1) ; use the shrinking heuristics (probability 0) ; do probability estimates ) (build-parameter svm-type kernel-type degree gamma coef0 cache-size eps C nr-weight nu p shrinking probability)) (define build-parameter (foreign-lambda* (c-pointer (struct "svm_parameter")) ((int svm_type) (int kernel_type) (int degree) (double gamma) (double coef0) (double cache_size) (double eps) (double C) (int nr_weight) (double nu) (double p) (int shrinking) (int probability)) "struct svm_parameter * param = malloc(sizeof(struct svm_parameter)); param->svm_type = svm_type; param->kernel_type = kernel_type; param->degree = degree; param->gamma = gamma; param->coef0 = coef0; param->cache_size = cache_size; param->eps = eps; param->C = C; param->nr_weight = nr_weight; param->weight_label = NULL; param->weight = NULL; param->nu = nu; param->p = p; param->shrinking = shrinking; param->probability = probability; C_return(param);")) ;; ---------------------------------------------------------------------------- ;; problems (foreign-declare " static char* readline(FILE *input) { int len, max_line_len = 1024; char * line = malloc(sizeof(char) * max_line_len); if(fgets(line,max_line_len,input) == NULL) return NULL; while(strrchr(line,'\\n') == NULL) { max_line_len *= 2; line = (char *) realloc(line,max_line_len); len = (int) strlen(line); if(fgets(line+len,max_line_len-len,input) == NULL) break; } C_return(line); }") (define problem-num-instances (foreign-lambda* integer (((c-pointer (struct svm_problem)) prob)) "int size = prob->l; C_return(size);")) ;; uses read-problem from svm-train.c ;; -- reads in a problem in svmlight format and returns a pointer to the problem (define read-problem (foreign-lambda* c-pointer ((c-string filename)) " struct svm_problem * prob = malloc(sizeof(struct svm_problem)); struct svm_node * x_space; int elements, max_index, inst_max_index, i, j; FILE *fp = fopen(filename,\"r\"); char *endptr; char *idx, *val, *label; if(fp == NULL) { fprintf(stderr,\"can't open input file %s\\n\",filename); exit(1); } prob->l = 0; elements = 0; int max_line_len = 1024; char * line = malloc(sizeof(char) * max_line_len); while((line = readline(fp)) != NULL) { char *p = strtok(line,\" \\t\"); // label // features while(1) { p = strtok(NULL,\" \\t\"); if(p == NULL || *p == '\\n') // check '\\n' as ' ' may be after the last feature break; ++elements; } ++elements; ++prob->l; } rewind(fp); prob->y = malloc(sizeof(double) * prob->l); prob->x = malloc(sizeof(struct svm_node *) * prob->l); x_space = malloc(sizeof(struct svm_node) * elements); max_index = 0; j=0; for(i=0; i < prob->l; i++) { inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has start from 0 line = readline(fp); prob->x[i] = &x_space[j]; label = strtok(line,\" \\t\"); prob->y[i] = strtod(label,&endptr); if(endptr == label) exit(1); while(1) { idx = strtok(NULL,\":\"); val = strtok(NULL,\" \\t\"); if(val == NULL) break; x_space[j].index = (int) strtol(idx,&endptr,10); if(endptr == idx || *endptr != '\\0' || x_space[j].index <= inst_max_index) exit(1); else inst_max_index = x_space[j].index; x_space[j].value = strtod(val,&endptr); if(endptr == val || (*endptr != '\\0' && !isspace(*endptr))) exit(1); ++j; } if(inst_max_index > max_index) max_index = inst_max_index; x_space[j++].index = -1; } fclose(fp); C_return(prob); " )) ;; access parts of problem (define problem-get-instance (foreign-lambda* (c-pointer (struct "svm_node")) (((c-pointer (struct "svm_problem")) problem) (int index)) "C_return(problem->x[index]);")) (define instance-index (foreign-lambda* int (((c-pointer (struct "svm_node")) instance) (int index)) "C_return(instance[index].index);")) (define instance-value (foreign-lambda* double (((c-pointer (struct "svm_node")) instance) (int index)) "C_return(instance[index].value);")) (define (problem-get-instance-values instance) (let loop ((i 0) (result '())) (if (= -1 (instance-index instance i)) (reverse result) (loop (+ 1 i) (cons (list (instance-index instance i) (instance-value instance i)) result))))) (define problem-get-label (foreign-lambda* int (((c-pointer (struct "svm_problem")) problem) (int index)) "C_return(problem->y[index]);")) ;; functions from svm.cpp (foreign-declare "void print_null(const char *s) {}") ;; clear the print function, so training is done 'quietly' (define svm-no-print-function (foreign-lambda* void () "svm_set_print_string_function(&print_null);")) ;; input: const struct svm_problem *, const struct svm_parameter * ;; output: struct svm_model * (define svm-train (foreign-lambda (c-pointer (struct "svm_model")) "svm_train" (c-pointer (struct "svm_problem")) (c-pointer (struct "svm_parameter")))) ;; input: const struct svm_problem *, const struct svm_parameter *, int nr_fold, double * target ;; output: void ; (define svm-cross-validation ; (foreign-lambda void "svm_cross_validation" c-pointer c-pointer int double-pointer)) ;; input: const char * file_name, const struct svm_model * ;; output: int (define svm-save-model (foreign-lambda integer "svm_save_model" c-string c-pointer)) ;; input: const char * file_name ;; output: struct svm_model * (define svm-load-model (foreign-lambda c-pointer "svm_load_model" c-string)) ;; input: const struct svm_model * ;; output: int (define svm-get-svm-type (foreign-lambda integer "svm_get_svm_type" c-pointer)) ;; input: const struct svm_model * ;; output: int (define svm-get-nr-class (foreign-lambda integer "svm_get_nr_class" c-pointer)) ;; collects labels within model into output variable ;; input: const struct svm_model *, int * label ;; output: void ; (define svm-get-labels ; (foreign-lambda void "svm_get_labels" c-pointer int-pointer)) ;; input: const struct svm_model * ;; output: double (define svm-get-svr-probability (foreign-lambda double "svm_get_svr_probability" c-pointer)) ;; input: const struct svm_model *, const struct svm_node *, double * dec_values ;; output: double ; (define svm-predict-values ; (foreign-lambda double "svm_predict_values" c-pointer c-pointer double-pointer)) ;; input: const struct svm_model *, const struct svm_node * ;; output: double (define svm-predict (foreign-lambda double "svm_predict" (c-pointer (struct "svm_model")) (c-pointer (struct "svm_node")))) ;; input: const struct svm_model *, const struct svm_node *, double * prob_estimates ;; output: double ; (define svm-predict-probability ; (foreign-lambda double "svm_predict_probability" c-pointer c-pointer double-pointer)) ;; input: struct svm_model * ;; output: void (define svm-free-model-content (foreign-lambda void "svm_free_model_content" c-pointer)) ;; input: struct svm_model * ;; output: void (define svm-destroy-model (foreign-lambda void "svm_destroy_model" c-pointer)) ;; input: struct svm_parameter * ;; output: void (define svm-destroy-param (foreign-lambda void "svm_destroy_param" c-pointer)) ;; input: const struct svm_problem *, const struct svm_parameter * ;; output: c-string (define svm-check-parameter (foreign-lambda c-string "svm_check_parameter" c-pointer c-pointer)) ;; input: const struct svm_model * ;; output: int (define svm-check-probability-model (foreign-lambda integer "svm_check_probability_model" c-pointer)) )