;;; libsvm software wrapped for Chicken Scheme.
;;; Copyright (c) Peter Lane, 2010.
;;; This program is free software: you can redistribute it and/or modify
;;; it under the terms of the GNU General Public License as published by
;;; the Free Software Foundation, either version 3 of the License, or
;;; (at your option) any later version.
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see .
;;; -------------------------------------------------------------------------------
;; TODO:
;; 1. Error reporting/recovery
;; 2. Better handling for larger datasets
;; 3. Memory management - can we automate deleting of objects when scheme object gc-ed?
(module
libsvm
(export
C-SVC NU-SVC ONE-CLASS EPSILON-SVR NU-SVR
LINEAR POLY RBF SIGMOID PRECOMPUTED
make-problem
make-svm-node
make-svm-parameter
problem-get-instance
problem-get-instance-values
problem-get-label
problem-num-instances
read-problem
svm-no-print-function
svm-train
svm-save-model
svm-load-model
svm-get-svm-type
svm-get-nr-class
svm-get-svr-probability
svm-predict
svm-free-model-content
svm-destroy-model
svm-destroy-param
svm-check-parameter
svm-check-probability-model
)
(import chicken extras foreign scheme)
#>
#include "svm.h"
<#
;; svm types
(define C-SVC 0)
(define NU-SVC 1)
(define ONE-CLASS 2)
(define EPSILON-SVR 3)
(define NU-SVR 4)
;; kernel types
(define LINEAR 0)
(define POLY 1)
(define RBF 2)
(define SIGMOID 3)
(define PRECOMPUTED 4)
;; problem definition should be a list of lists,
;; each sublist defining an instance, in one of following formats:
;; - label feature-value-0 feature-value-1 ...
;; - label (index value) (index value) ...
(define (make-problem definition)
(unless (and (list? definition)
(> (length definition) 0) ; must be at least one instance
(list? (car definition))
(> (length (car definition)) 1) ; instance must have a label and at least one feature
)
(error "make-problem: invalid definition"))
(if (list? (cadar definition))
; handle sparse vector definition
(let* ((max-feature-index (apply max (map car (apply append (map cdr definition)))))
(problem (create-svm-problem (length definition) max-feature-index))
(i 0))
(for-each (lambda (instance)
(problem-set-label! problem i (exact->inexact (car instance)))
(let ((j 0)) ; to record the end value position
(for-each (lambda (feature-defn)
(problem-set-value! problem i (car feature-defn) (exact->inexact (cadr feature-defn)))
(set! j (+ 1 j)))
(cdr instance))
(problem-set-end-value! problem i j))
(set! i (+ 1 i)))
definition)
problem)
(let ((problem (create-svm-problem (length definition) (- (length (car definition)) 1)))
(i 0))
(for-each (lambda (instance)
(problem-set-label! problem i (exact->inexact (car instance)))
(let ((j 0))
(for-each (lambda (value)
(problem-set-value! problem i j (exact->inexact value))
(set! j (+ 1 j)))
(cdr instance))
(problem-set-end-value! problem i j))
(set! i (+ 1 i)))
definition)
problem)))
(define create-svm-problem
(foreign-lambda* (c-pointer (struct "svm_problem")) ((int num_instances) (int num_features))
"int i;
struct svm_problem * problem = malloc(sizeof(struct svm_problem));
problem->l = num_instances;
problem->x = malloc(sizeof(struct svm_node *) * num_instances);
struct svm_node * x_space;
x_space = malloc(sizeof(struct svm_node) * (1 + num_features) * num_instances);
for (i = 0; i < problem->l; ++i)
{
problem->x[i] = &x_space[i * (1 + num_features)];
}
problem->y = malloc(sizeof(double) * num_instances);
C_return(problem);"))
(define problem-set-label!
(foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (double value))
"problem->y[index] = value;"))
(define problem-set-value!
(foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (int feature) (double value))
"problem->x[index][feature].index = feature;
problem->x[index][feature].value = value;"))
(define problem-set-end-value!
(foreign-lambda* void (((c-pointer (struct "svm_problem")) problem) (int index) (int feature))
"problem->x[index][feature].index = -1;
problem->x[index][feature].value = 0.0;"))
(define make-svm-node
(foreign-lambda* (c-pointer (struct "svm_node")) ((int index) (double value))
"struct svm_node * node = malloc(sizeof(struct svm_node));
node->index = index;
node->value = value;
C_return(node);"
))
;; returns a c-pointer to a svm_parameter
(define (make-svm-parameter
#!key (svm-type C-SVC)
(kernel-type LINEAR)
(degree 3) ; for poly
(gamma 0.0) ; for poly/rbf/sigmoid
(coef0 0.0) ; for poly/sigmoid
;; these are for training only
(cache-size 100.0) ; in MB
(eps 0.001) ; stopping criteria
(C 1.0) ; for C_SVC, EPSILON_SVR and NU_SVR
(nr-weight 0) ; for C_SVC
; int *weight_label; for C_SVC ;; ??
; double* weight; for C_SVC ;; ??
(nu 0.5) ; for NU_SVC, ONE_CLASS, and NU_SVR
(p 0.1) ; for EPSILON_SVR
(shrinking 1) ; use the shrinking heuristics
(probability 0) ; do probability estimates
)
(build-parameter svm-type kernel-type degree gamma coef0 cache-size eps C nr-weight nu p shrinking probability))
(define build-parameter
(foreign-lambda* (c-pointer (struct "svm_parameter"))
((int svm_type)
(int kernel_type)
(int degree)
(double gamma)
(double coef0)
(double cache_size)
(double eps)
(double C)
(int nr_weight)
(double nu)
(double p)
(int shrinking)
(int probability))
"struct svm_parameter * param = malloc(sizeof(struct svm_parameter));
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->gamma = gamma;
param->coef0 = coef0;
param->cache_size = cache_size;
param->eps = eps;
param->C = C;
param->nr_weight = nr_weight;
param->weight_label = NULL;
param->weight = NULL;
param->nu = nu;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
C_return(param);"))
;; ----------------------------------------------------------------------------
;; problems
(foreign-declare "
static char* readline(FILE *input)
{
int len, max_line_len = 1024;
char * line = malloc(sizeof(char) * max_line_len);
if(fgets(line,max_line_len,input) == NULL)
return NULL;
while(strrchr(line,'\\n') == NULL)
{
max_line_len *= 2;
line = (char *) realloc(line,max_line_len);
len = (int) strlen(line);
if(fgets(line+len,max_line_len-len,input) == NULL)
break;
}
C_return(line);
}")
(define problem-num-instances
(foreign-lambda* integer (((c-pointer (struct svm_problem)) prob))
"int size = prob->l;
C_return(size);"))
;; uses read-problem from svm-train.c
;; -- reads in a problem in svmlight format and returns a pointer to the problem
(define read-problem
(foreign-lambda* c-pointer ((c-string filename))
"
struct svm_problem * prob = malloc(sizeof(struct svm_problem));
struct svm_node * x_space;
int elements, max_index, inst_max_index, i, j;
FILE *fp = fopen(filename,\"r\");
char *endptr;
char *idx, *val, *label;
if(fp == NULL)
{
fprintf(stderr,\"can't open input file %s\\n\",filename);
exit(1);
}
prob->l = 0;
elements = 0;
int max_line_len = 1024;
char * line = malloc(sizeof(char) * max_line_len);
while((line = readline(fp)) != NULL)
{
char *p = strtok(line,\" \\t\"); // label
// features
while(1)
{
p = strtok(NULL,\" \\t\");
if(p == NULL || *p == '\\n') // check '\\n' as ' ' may be after the last feature
break;
++elements;
}
++elements;
++prob->l;
}
rewind(fp);
prob->y = malloc(sizeof(double) * prob->l);
prob->x = malloc(sizeof(struct svm_node *) * prob->l);
x_space = malloc(sizeof(struct svm_node) * elements);
max_index = 0;
j=0;
for(i=0; i < prob->l; i++)
{
inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has start from 0
line = readline(fp);
prob->x[i] = &x_space[j];
label = strtok(line,\" \\t\");
prob->y[i] = strtod(label,&endptr);
if(endptr == label)
exit(1);
while(1)
{
idx = strtok(NULL,\":\");
val = strtok(NULL,\" \\t\");
if(val == NULL)
break;
x_space[j].index = (int) strtol(idx,&endptr,10);
if(endptr == idx || *endptr != '\\0' || x_space[j].index <= inst_max_index)
exit(1);
else
inst_max_index = x_space[j].index;
x_space[j].value = strtod(val,&endptr);
if(endptr == val || (*endptr != '\\0' && !isspace(*endptr)))
exit(1);
++j;
}
if(inst_max_index > max_index)
max_index = inst_max_index;
x_space[j++].index = -1;
}
fclose(fp);
C_return(prob);
"
))
;; access parts of problem
(define problem-get-instance
(foreign-lambda* (c-pointer (struct "svm_node")) (((c-pointer (struct "svm_problem")) problem) (int index))
"C_return(problem->x[index]);"))
(define instance-index
(foreign-lambda* int (((c-pointer (struct "svm_node")) instance) (int index))
"C_return(instance[index].index);"))
(define instance-value
(foreign-lambda* double (((c-pointer (struct "svm_node")) instance) (int index))
"C_return(instance[index].value);"))
(define (problem-get-instance-values instance)
(let loop ((i 0)
(result '()))
(if (= -1 (instance-index instance i))
(reverse result)
(loop (+ 1 i)
(cons (list (instance-index instance i)
(instance-value instance i))
result)))))
(define problem-get-label
(foreign-lambda* int (((c-pointer (struct "svm_problem")) problem) (int index))
"C_return(problem->y[index]);"))
;; functions from svm.cpp
(foreign-declare "void print_null(const char *s) {}")
;; clear the print function, so training is done 'quietly'
(define svm-no-print-function
(foreign-lambda* void ()
"svm_set_print_string_function(&print_null);"))
;; input: const struct svm_problem *, const struct svm_parameter *
;; output: struct svm_model *
(define svm-train
(foreign-lambda (c-pointer (struct "svm_model"))
"svm_train"
(c-pointer (struct "svm_problem"))
(c-pointer (struct "svm_parameter"))))
;; input: const struct svm_problem *, const struct svm_parameter *, int nr_fold, double * target
;; output: void
; (define svm-cross-validation
; (foreign-lambda void "svm_cross_validation" c-pointer c-pointer int double-pointer))
;; input: const char * file_name, const struct svm_model *
;; output: int
(define svm-save-model
(foreign-lambda integer "svm_save_model" c-string c-pointer))
;; input: const char * file_name
;; output: struct svm_model *
(define svm-load-model
(foreign-lambda c-pointer "svm_load_model" c-string))
;; input: const struct svm_model *
;; output: int
(define svm-get-svm-type
(foreign-lambda integer "svm_get_svm_type" c-pointer))
;; input: const struct svm_model *
;; output: int
(define svm-get-nr-class
(foreign-lambda integer "svm_get_nr_class" c-pointer))
;; collects labels within model into output variable
;; input: const struct svm_model *, int * label
;; output: void
; (define svm-get-labels
; (foreign-lambda void "svm_get_labels" c-pointer int-pointer))
;; input: const struct svm_model *
;; output: double
(define svm-get-svr-probability
(foreign-lambda double "svm_get_svr_probability" c-pointer))
;; input: const struct svm_model *, const struct svm_node *, double * dec_values
;; output: double
; (define svm-predict-values
; (foreign-lambda double "svm_predict_values" c-pointer c-pointer double-pointer))
;; input: const struct svm_model *, const struct svm_node *
;; output: double
(define svm-predict
(foreign-lambda double "svm_predict" (c-pointer (struct "svm_model")) (c-pointer (struct "svm_node"))))
;; input: const struct svm_model *, const struct svm_node *, double * prob_estimates
;; output: double
; (define svm-predict-probability
; (foreign-lambda double "svm_predict_probability" c-pointer c-pointer double-pointer))
;; input: struct svm_model *
;; output: void
(define svm-free-model-content
(foreign-lambda void "svm_free_model_content" c-pointer))
;; input: struct svm_model *
;; output: void
(define svm-destroy-model
(foreign-lambda void "svm_destroy_model" c-pointer))
;; input: struct svm_parameter *
;; output: void
(define svm-destroy-param
(foreign-lambda void "svm_destroy_param" c-pointer))
;; input: const struct svm_problem *, const struct svm_parameter *
;; output: c-string
(define svm-check-parameter
(foreign-lambda c-string "svm_check_parameter" c-pointer c-pointer))
;; input: const struct svm_model *
;; output: int
(define svm-check-probability-model
(foreign-lambda integer "svm_check_probability_model" c-pointer))
)