;; -*- Hen -*-
;;
;; A lexical analyzer for I-expressions
;;
;;
;; Copyright 2010-2011 Ivan Raikov and the Okinawa Institute of Science
;; and Technology.
;;
;; This program is free software: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; A full copy of the GPL license can be found at
;; .
;;
char_qword ([\33-\38]|[\40-\55295]|[\57344-\65533]|[\65536-\1114109])
char_space (\32)
char_tab (\9)
break (\13|\10)+
word (([\33\35-\38]|[\42\43\45-\47\58-\55295]|[\57344-\65533]|[\65536-\1114109])([\33-\38]|[\42\43\45-\55295]|[\57344-\65533]|[\65536-\1114109])*)
hex [0-9A-Fa-f]
octal [0-7]
binary [0-1]
decimal [0-9]
%%
;; Lexer rules
"#"({word}|({char_space}+))* (yycontinue)
^({char_space}|{char_tab})+*";"[^\10\13]*{break} (yycontinue)
({char_space}|{char_tab})+*";"[^\10\13]* (yycontinue)
^({char_space}|{char_tab})+{break} (yycontinue)
\' (let loop ([cs '()])
(let ([c (yygetc)])
(cond [(eq? 'eof c) (lexer-error "unexpected end of string constant")]
[(char=? c #\\) (let ((n (yygetc)))
(loop (cons n cs)))]
[(char=? c #\') (tok yyline WORD (reverse-list->string cs)) ]
[else (loop (cons c cs))])))
\" (let loop ([cs '()])
(let ([c (yygetc)])
(cond [(eq? 'eof c) (lexer-error "unexpected end of string constant")]
[(char=? c #\\) (let ((n (yygetc)))
(loop (cons n cs)))]
[(char=? c #\") (tok yyline WORD (reverse-list->string cs)) ]
[else (loop (cons c cs))])))
{break} (tok yyline BREAK)
{break}"--"{break}+ (tok yyline END)
({char_space}|{char_tab})+ (let recur ((len 0) (i (- (string-length yytext) 1)))
(if (negative? i) (tok yyline SPACE len)
(recur (if (eq? (string-ref yytext i) #\space)
(+ 1 len) (+ 7 len)) (- i 1))))
0(x|X)({hex})+ (tok yyline NAT (string->number (substring yytext 2 (string-length yytext)) 16))
0(o|O)({octal})+ (tok yyline NAT (string->number (substring yytext 2 (string-length yytext)) 8))
0(b|B)({binary})+ (tok yyline NAT (string->number (substring yytext 2 (string-length yytext)) 2))
0(d|D)({decimal})+ (tok yyline NAT (string->number (substring yytext 2 (string-length yytext)) 10))
-?({decimal})+ (tok yyline NAT (string->number yytext 10))
-?(({decimal}+(\.{decimal}+)?)|(\.{decimal}+))([eE]([-+])?{decimal}+)? (tok yyline REAL (string->number yytext))
{decimal}+(\.{decimal}+)({word}?) (tok yyline WORD (string->symbol yytext))
{word} (tok yyline WORD (string->symbol yytext))
"(" (tok yyline LPAREN)
")" (tok yyline RPAREN)
"," (tok yyline COMMA)
<> '*eoi*
<> (lexer-error (conc yyline ": illegal character") (yygetc))