;;; Simple parser for string-notated bytevectors. (define-library (srfi 207 read) (import (scheme base) (scheme case-lambda) (scheme char) (only (chicken base) void) (chicken condition) (chicken type) ) (export read-textual-bytestring) (begin (include "exceptions.scm") (define-type bytevector u8vector) (: read-error (string #!rest * -> noreturn)) (define (read-error msg . args) (apply bytestring-error 'read-textual-bytestring msg args)) (: parse (boolean -> undefined)) (define (parse prefix) (when prefix (consume-prefix)) (consume-quote) (let lp ((c (read-char))) (cond ((eof-object? c) (read-error "unexpected EOF")) ((char=? c #\") (void)) ; terminating quote ((char=? c #\\) (let ((c* (read-char))) (cond ((eof-object? c*) (read-error "incomplete escape sequence")) ((escape c*) => (lambda (b) (write-u8 b) (lp (read-char)))) (else (lp (read-char)))))) ((and (char>=? c #\space) (char<=? c #\~)) (write-u8 (char->integer c)) (lp (read-char))) (else (read-error "invalid character" c))))) (: consume-quote (-> undefined)) (define (consume-quote) (let ((c (read-char))) (cond ((eof-object? c) (read-error "unexpected EOF")) ((char=? c #\") (void)) (else (read-error "invalid character (expected #\\\")" c))))) (: consume-prefix (-> undefined)) (define (consume-prefix) (let ((s (read-string 3))) (cond ((eof-object? s) (read-error "unexpected EOF")) ((string=? s "#u8") (void)) (else (read-error "invalid bytestring prefix" s))))) (: escape (char -> fixnum)) (define (escape c) (case c ((#\a) 7) ((#\b) 8) ((#\t) 9) ((#\n) 10) ((#\r) 13) ((#\") 34) ((#\\) 92) ((#\|) 124) ((#\x) (parse-hex)) ((#\newline) (skip-horizontal-whitespace) #f) ; skip (else (cond ((char-whitespace? c) (skip-horizontal-whitespace) (skip-line-break) #f) (else (read-error "invalid escaped character" c)))))) (: parse-hex (-> fixnum)) (define (parse-hex) (let* ((hex1 (read-char)) (hex2 (read-char))) (when (or (eof-object? hex1) (eof-object? hex2)) (read-error "incomplete hexadecimal sequence")) (if (char=? hex2 #\;) (or (string->number (string hex1) 16) (read-error "invalid hexadecimal sequence")) (let ((term (read-char))) (if (eqv? term #\;) (or (string->number (string hex1 hex2) 16) (read-error "invalid hexadecimal sequence")) (read-error "overlong or unterminated hexadecimal sequence")))))) (: skip-line-break (-> undefined)) (define (skip-line-break) (let ((c (read-char))) (unless (eqv? #\newline c) (read-error "expected newline" c))) (skip-horizontal-whitespace)) (: skip-horizontal-whitespace (-> undefined)) (define (skip-horizontal-whitespace) (let lp ((c (peek-char))) (when (and (char-whitespace? c) (not (char=? c #\newline))) (read-char) (lp (peek-char))))) (: read-textual-bytestring (boolean #!optional input-port -> bytevector)) (define read-textual-bytestring (case-lambda ((prefix) (read-textual-bytestring prefix (current-input-port))) ((prefix in) (assert-type 'read-textual-bytestring (boolean? prefix)) (assert-type 'read-textual-bytestring (input-port? in)) (call-with-port (open-output-bytevector) (lambda (out) (parameterize ((current-input-port in) (current-output-port out)) (parse prefix) (get-output-bytevector out))))))) ))