;;
;; Chicken agrep library interface. Code ported from the Caml agrep
;; library by Xavier Leroy.
;;
;;
;; Copyright 2009-2015 Ivan Raikov.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
;;
;; Error handling, initialization and finalization
;; The following three functions are borrowed from the
;; Chicken-specific parts of SWIG
#>
static void chicken_Panic (C_char *) C_noret;
static void chicken_Panic (C_char *msg)
{
C_word *a = C_alloc (C_SIZEOF_STRING (strlen (msg)));
C_word scmmsg = C_string2 (&a, msg);
C_halt (scmmsg);
exit (5); /* should never get here */
}
static void chicken_ThrowException(C_word value) C_noret;
static void chicken_ThrowException(C_word value)
{
char *aborthook = C_text("\003sysabort");
C_word *a = C_alloc(C_SIZEOF_STRING(strlen(aborthook)));
C_word abort = C_intern2(&a, aborthook);
abort = C_block_item(abort, 0);
if (C_immediatep(abort))
chicken_Panic(C_text("`##sys#abort' is not defined"));
#if defined(C_BINARY_VERSION) && (C_BINARY_VERSION >= 8)
C_word rval[3] = { abort, C_SCHEME_UNDEFINED, value };
C_do_apply(3, rval);
#else
C_save(value);
C_do_apply(1, abort, C_SCHEME_UNDEFINED);
#endif
}
void chicken_agrep_exception (int code, int msglen, const char *msg)
{
C_word *a;
C_word scmmsg;
C_word list;
a = C_alloc (C_SIZEOF_STRING (msglen) + C_SIZEOF_LIST(2));
scmmsg = C_string2 (&a, (char *) msg);
list = C_list(&a, 2, C_fix(code), scmmsg);
chicken_ThrowException(list);
}
<#
; Include into generated code, but don't parse:
#>
#include
#include
#include
typedef unsigned char uchar;
typedef unsigned int uint;
#define BITS_PER_WORD (8 * sizeof(unsigned int))
#define Setbit(ptr,nbit) \
((ptr)[(nbit) / BITS_PER_WORD] |= (1UL << ((nbit) % BITS_PER_WORD)))
unsigned char word_constituent[256] = {
/* 0 - 31 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
/* ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
/* @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ */
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
/* ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \127 */
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,
/* 128-159 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
/* ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ® ¯ ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿ */
0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,1,0,
/* À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,
/* à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ */
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1
};
/* Specialized versions of string matching code */
#undef WHOLE_WORD
#define FUNCTION_NAME match_0errs
#define NERRS 0
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_1errs
#define NERRS 1
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_2errs
#define NERRS 2
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_3errs
#define NERRS 3
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define WHOLE_WORD
#define FUNCTION_NAME match_word_0errs
#define NERRS 0
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_word_1errs
#define NERRS 1
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_word_2errs
#define NERRS 2
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
#define FUNCTION_NAME match_word_3errs
#define NERRS 3
#include "skeleton.h"
#undef FUNCTION_NAME
#undef NERRS
<#
(define BITS-PER-WORD (foreign-value "BITS_PER_WORD" int))
;; Allocate bit matrix object
(define (agrep-alloc-bitmatrix patlen nentries)
(let* ((nwords (fx/ (fx+ patlen (fx- BITS-PER-WORD 1)) BITS-PER-WORD))
(size (+ nwords nentries)))
(make-u32vector size 0)))
(define agrep-set-bit
(foreign-lambda* void ((u32vector matrix)
(unsigned-int v_patlen)
(unsigned-int v_index)
(unsigned-int v_bitnum))
#<
void *stat_alloc (size_t size)
{
void *p;
if ((p = malloc (size)) == NULL)
{
chicken_agrep_exception (ENOMEM, 25, "unable to allocate memory");
}
return p;
}
void stat_free (void *p)
{
if (p == NULL)
{
chicken_agrep_exception (EINVAL, 18, "null pointer freed");
}
free (p);
}
/* General code: arbitrary errors, arbitrary pattern length */
ulong match_general(uint * table, uint m,
uint nerrs, int wholeword,
uchar * text, size_t length)
{
uint nwords, n, j;
uint ** R;
uint * Rpbefore;
uint Found_offset, Found_mask;
uint * Ssharp;
uint * Rc, * Rp;
uint carry;
uint match_empty;
ulong retcode;
nwords = (m + BITS_PER_WORD - 1) / BITS_PER_WORD;
R = stat_alloc((nerrs + 1) * sizeof(uint *));
for (n = 0; n <= nerrs; n++)
R[n] = stat_alloc(nwords * sizeof(uint));
Rpbefore = stat_alloc(nwords * sizeof(uint));
/* Initialize Found */
Found_offset = m / BITS_PER_WORD;
Found_mask = 1UL << (m % BITS_PER_WORD);
/* Initialize R */
for (n = 0; n <= nerrs; n++)
{
memset(R[n], 0, nwords * sizeof(uint));
for (j = 0; j <= n; j++) Setbit(R[n], j);
}
/* Initialize Ssharp & match_empty */
Ssharp = table + 256 * nwords;
match_empty = 1;
/* Main loop */
for (/*nothing*/; length > 0; length--, text++) {
uint * S = table + (*text) * nwords;
if (wholeword)
match_empty = word_constituent[text[0]] ^ word_constituent[text[1]];
/* Special case for 0 errors */
Rc = R[0];
carry = match_empty;
for (j = 0; j < nwords; j++) {
uint Rcbefore = Rc[j];
uint toshift = Rcbefore & S[j];
Rc[j] = (toshift << 1) | (Rcbefore & Ssharp[j]) | carry;
carry = toshift >> (BITS_PER_WORD - 1);
Rpbefore[j] = Rcbefore;
}
if (Rc[Found_offset] & Found_mask && match_empty)
{ retcode = 0; goto exit; }
/* General case for > 0 errors */
for (n = 1; n <= nerrs; n++) {
Rp = Rc;
Rc = R[n];
carry = match_empty;
for (j = 0; j < nwords; j++) {
uint Rcbefore = Rc[j];
uint toshift = (Rcbefore & S[j]) | Rpbefore[j] | Rp[j];
Rc[j] = (toshift << 1)
| Rpbefore[j]
| (Rcbefore & Ssharp[j])
| carry;
carry = toshift >> (BITS_PER_WORD - 1);
Rpbefore[j] = Rcbefore;
}
if (Rc[Found_offset] & Found_mask && match_empty)
{ retcode = n; goto exit; }
}
}
/* Not found */
retcode = C_WORD_MAX;
/* Cleanup */
exit:
for (n = 0; n <= nerrs; n++) stat_free(R[n]);
stat_free(R);
stat_free(Rpbefore);
return retcode;
}
<#
(define agrep-match
(foreign-primitive int ((c-string v_text)
(unsigned-int v_ofs)
(unsigned-int v_len)
(unsigned-int v_patlen)
(u32vector v_table)
(unsigned-int v_nerrs)
(bool v_wholeword))
#<