(import magic-pipes) (import args) (import (chicken process-context)) (import alist-lib) (import srfi-1) (define-record-type parser-state (make-state field-buffer record-buffer) state? (field-buffer state-field-buffer) (record-buffer state-record-buffer)) (define (push-char state char) (make-state (cons char (state-field-buffer state)) (state-record-buffer state))) (define (push-field state) (make-state '() (cons (state-current-field state) (state-record-buffer state)))) (define (state-current-field state) (list->string (reverse (state-field-buffer state)))) (define (state-current-record state) (reverse (cons (state-current-field state) (state-record-buffer state)))) (define clean-state (make-state '() '())) (define (state-empty? state) (and (null? (state-field-buffer state)) (null? (state-record-buffer state)))) (define ((make-csv-parser record-sep field-sep field-quote char-quote char-quotes-work-in-field-quotes? reduce-doubled-field-quotes?) get-char! emit-record!) (letrec ((parse-field (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) ;; Record separater followed by EOF doesn't mean an extra zero-field record (unless (state-empty? state) (emit-record! (state-current-record state))) (void)) ((eq? char record-sep) (emit-record! (state-current-record state)) (parse-field clean-state)) ((eq? char field-sep) (parse-field (push-field state))) ((eq? char field-quote) (parse-quoted-field state)) ((eq? char char-quote) (parse-quoted-char state)) (else (parse-unqoted-field (push-char state char))))))) (parse-quoted-char (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) (emit-record! (state-current-record (push-char state char-quote))) (void)) (else (parse-unqoted-field (push-char state char))))))) (parse-unqoted-field (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) (emit-record! (state-current-record state)) (void)) ((eq? char record-sep) (emit-record! (state-current-record state)) (parse-field clean-state)) ((eq? char field-sep) (parse-field (push-field state))) ((eq? char char-quote) (parse-quoted-char state)) (else (parse-unqoted-field (push-char state char))))))) (parse-quoted-field (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) (emit-record! (state-current-record state)) (void)) ((eq? char field-quote) (parse-possible-end-quote state)) ((and char-quotes-work-in-field-quotes? (eq? char char-quote)) (parse-double-quoted-char state)) (else (parse-quoted-field (push-char state char))))))) (parse-double-quoted-char (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) (emit-record! (state-current-record (push-char state char-quote))) (void)) (else (parse-quoted-field (push-char state char))))))) (parse-possible-end-quote (lambda (state) (let ((char (get-char!))) (cond ((eof-object? char) (emit-record! (state-current-record state)) (void)) ((eq? char record-sep) (emit-record! (state-current-record state)) (parse-field clean-state)) ((eq? char field-sep) (parse-field (push-field state))) ((eq? char field-quote) ;; Doubled field quote character (if reduce-doubled-field-quotes? (parse-quoted-field (push-char state char)) (parse-quoted-field (push-char (push-char state field-quote) char)))) (else (parse-quoted-field (push-char (push-char state field-quote) char)))))))) (parse-field clean-state))) (define (parse-char-field-arg options name default usage) (let ((value-str (alist-ref options name (lambda () default)))) (if (= (string-length value-str) 1) (string-ref value-str 0) (usage)))) (receive (options operands before-exprs after-exprs usage) (parse-mp-args (command-line-arguments) (list (args:make-option (R record) (required: "CHARACTER") "Specify the record separater character") (args:make-option (D delimiter) (required: "CHARACTER") "Specify the field delimiter character") (args:make-option (T tsv) #:none "Set the field delimiter character to a tab") (args:make-option (no-fields) #:none "Disabled field delimiters, every record has a single field") (args:make-option (Q field-quote) (required: "CHARACTER") "Specify the field quote character") (args:make-option (q no-field-quote) #:none "Disable field quoting") (args:make-option (C char-quote) (required: "CHARACTER") "Specify the single character quote character") (args:make-option (c no-char-quote) #:none "Disable character quoting") (args:make-option (no-embedded-quotes) #:none "Disable character quotes inside field quotes") (args:make-option (no-embedded-double-quotes) #:none "Don't treat doubled field-quotes inside field quotes as a single field quote character")) "" "Read CSV from standard input, and convert each record to a list sexpr on standard output.") (unless (= (length operands) 0) (usage)) (let* ((ec (make-eval-context before-exprs '() after-exprs)) (record-sep (parse-char-field-arg options 'record "\n" usage)) (field-sep (cond ((assq 'tsv options) #\tab) ((assq 'no-fields options) #f) (else (parse-char-field-arg options 'delimiter "," usage)))) (field-quote (if (assq 'no-field-quote options) #f (parse-char-field-arg options 'field-quote "\"" usage))) (char-quote (if (assq 'no-char-quote options) #f (parse-char-field-arg options 'char-quote "\\" usage))) (char-quotes-work-in-field-quotes? (not (assq 'no-embedded-quotes options))) (reduce-doubled-field-quotes? (not (assq 'no-embedded-double-quotes options))) (parse! (make-csv-parser record-sep field-sep field-quote char-quote char-quotes-work-in-field-quotes? reduce-doubled-field-quotes?))) (parse! read-char (lambda (record) (data-write record))) (without-input-port (eval-context-end-closure ec))))