;; ;; Access the NeuroMorpho database and download morphology files. ;; ;; Copyright 2009 Ivan Raikov and the Okinawa Institute of Science and ;; Technology. ;; ;; This program is free software: you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; A full copy of the GPL license can be found at ;; . ;; (require-extension extras regex posix utils extras files data-structures tcp srfi-1 srfi-13 matchable html-parser sxml-transforms sxpath uri-generic getopt-long) (define lookup-def (lambda (k lst . rest) (let-optionals rest ((default #f)) (alist-ref k lst eq? default)))) (define (quotewrapped? str) (and (string? str) (string-prefix? "\"" str) (string-suffix? "\"" str) )) (define (quotewrap str) (cond ((quotewrapped? str) str) ((string-any char-whitespace? str) (string-append "\"" str "\"")) (else str))) (define (create-directory dir . rest) (system* "mkdir -p ~a" (quotewrap dir))) (define-constant +default-tcp-connect-timeout+ 10000) ; 10 seconds (define-constant +default-tcp-read/write-timeout+ 20000) ; 20 seconds (tcp-connect-timeout +default-tcp-connect-timeout+) (tcp-read-timeout +default-tcp-read/write-timeout+) (tcp-write-timeout +default-tcp-read/write-timeout+) (define *quiet* #f) (define *user-agent* "chicken-neuromorpho") (define (d fstr . args) (let ([port (if *quiet* (current-error-port) (current-output-port))]) (apply fprintf port fstr args) (flush-output port) ) ) (define opt-defaults `( (morphology-file . S) (meta-filter . (("Note" ""))) (index-fields . ("Neuron Name" "Note")) )) (define (defopt x) (lookup-def x opt-defaults)) (define (symbol-upcase str) (string->symbol (string-upcase str))) (define opt-grammar `( (data-dir "set download directory (default is a randomly generated name in /tmp)" (single-char #\d) (value (required DIR))) (morphology-file "download morphology files (Original, Standard or None, default is standard)" (single-char #\m) (value (required "O, S, or N") (default ,(defopt 'morphology-file)) (predicate ,(lambda (arg) (case (symbol-upcase arg) ((O S N ORIGINAL STANDARD) #t) (else #f)))) (transformer ,symbol-upcase))) (meta-filter "filter pages based on metadata" (single-char #\f) (value (required "NAME1=REGEXP1[,NAME2!=REGEXP2 ...]") (default ,(defopt 'meta-filter)) (transformer ,(lambda (arg) (map (lambda (x) (match (string-split x "!=") ((n v) `(!= . ,(map string-trim-both (list n v)))) (else (match (string-split x "=") ((n v ) `(= . ,(map string-trim-both (list n v)))) (else `(= ,(string-trim-both x) "")))))) (string-split arg ","))) ))) (print-metadata "print metadata" (single-char #\p)) (i "make index file") (index-fields ,(begin (print "index-fields = " (defopt 'index-fields)) (string-append "comma-separated list of index fields " "(default is " (string-intersperse (defopt 'index-fields) ", ") ")") ) (value (required "FIELD1,...") (default ,(defopt 'index-fields)) (transformer ,(lambda (arg) (map string-trim-both (string-split (or arg "") ",")))))) (help "Print help" (single-char #\h)) )) ;; Use args:usage to generate a formatted list of options (from OPTS), ;; suitable for embedding into help text. (define (neuromorpho:usage) (print "Usage: " (car (argv)) " [options...] operands ") (newline) (print "Where operands are HTML files that contain search results from NeuroMorpho: ") (print "e.g. " (car (argv)) " neuromorpho_searchresults.html") (newline) (print "The following options are recognized: ") (newline) (width 35) (print (parameterize ((indent 5)) (usage opt-grammar))) (exit 1)) ;; Process arguments and collate options and arguments into OPTIONS ;; alist, and operands (filenames) into OPERANDS. You can handle ;; options as they are processed, or afterwards. (define opts (getopt-long (command-line-arguments) opt-grammar)) (define opt (make-option-dispatch opts opt-grammar)) (define (get-data-dir) (or (opt 'data-dir) (let ([dir (create-temporary-directory)]) (data-dir dir) dir ) ) ) (define (create-temporary-directory) (let ((dir (or (getenv "TMPDIR") (getenv "TEMP") (getenv "TMP") "/tmp"))) (let loop () (let* ((n (##sys#fudge 16)) ; current milliseconds (pn (make-pathname dir (string-append "neuromorpho-" (number->string n 16)) "tmp"))) (cond ((file-exists? pn) (loop)) (else (create-directory pn) pn)))))) (define (network-failure msg . args) (signal (make-composite-condition (make-property-condition 'exn 'message "invalid response from server" 'arguments args) (make-property-condition 'http-fetch))) ) (define (make-HTTP-GET/1.1 location user-agent host #!key (port 80) (connection "close") (accept "*") (content-length 0)) (conc "GET " location " HTTP/1.1" "\r\n" "Connection: " connection "\r\n" "User-Agent: " user-agent "\r\n" "Accept: " accept "\r\n" "Host: " host #\: port "\r\n" "Content-length: " content-length "\r\n" "\r\n") ) (define (match-http-response rsp) (and (string? rsp) (string-match "HTTP/[0-9.]+\\s+([0-9]+)\\s+.*" rsp)) ) (define (response-match-code? mrsp code) (and mrsp (string=? (number->string code) (cadr mrsp))) ) (define (match-chunked-transfer-encoding ln) (string-match "[Tt]ransfer-[Ee]ncoding:\\s*chunked.*" ln) ) (define (http-fetch uri dest) (d "fetching ~s ...~%" (uri->string uri)) (match-let (((_ ((_ host port) ('/ . path) query) _) (uri->list uri))) (let* ((port (or port 80)) (locn (uri->string (update-uri (update-uri uri scheme: #f) host: #f))) (query (and query (not (string-null? query)) query)) (filedir (uri-decode-string (string-concatenate (intersperse (if query path (drop-right path 1)) "/")))) (filename (uri-decode-string (or (and query (cadr (string-split query "="))) (last path)))) (dest (make-pathname dest filedir)) (filepath (make-pathname dest filename))) (if (file-exists? filepath) filepath (begin (d "connecting to host ~s, port ~a ...~%" host port) (let-values ([(in out) (tcp-connect host port)]) (d "requesting ~s ...~%" locn) (display (make-HTTP-GET/1.1 locn *user-agent* host port: port accept: "*/*") out) (flush-output out) (d "reading response ...~%") (let ([chunked #f] [ok-response #f]) (let* ([h1 (read-line in)] [response-match (match-http-response h1)]) (d "~a~%" h1) ;;*** handle redirects here (cond ((response-match-code? response-match 200) (set! ok-response #t)) ((response-match-code? response-match 404) (d "file not found on server: ~s~%" locn)) (else (network-failure "invalid response from server" h1) )) (and ok-response (begin (let loop () (let ([ln (read-line in)]) (unless (string-null? ln) (when (match-chunked-transfer-encoding ln) (set! chunked #t)) (d "~a~%" ln) (loop) ) ) ) (if chunked (begin (d "reading chunks ...~%") (let ([data (read-chunks in)]) (close-input-port in) (close-input-port out) (if (not (file-exists? dest)) (create-directory dest #t)) (d "writing to ~s~%" filepath) (with-output-to-file filepath (cut display data) ) filepath)) (begin (d "reading data ...~%") (let ([data (read-string #f in)]) (close-input-port in) (close-input-port out) (if (not (file-exists? dest)) (create-directory dest #t)) (d "writing to ~s~%" filepath) (with-output-to-file filepath (cut display data) binary:) filepath))))) ) ))))))) (define (read-chunks in) (let get-chunks ([data '()]) (let ([size (string->number (read-line in) 16)]) (if (zero? size) (string-concatenate-reverse data) (let ([chunk (read-string size in)]) (read-line in) (get-chunks (cons chunk data)) ) ) ) ) ) (define (div-class class-name) `(div (@ class *text* ,(lambda (x ns) (or (and (pair? x) (string=? (car x) class-name)) '()))))) (define (parse-sxml fpath) (with-input-from-file fpath (lambda () (cons '*TOP* (html->sxml (current-input-port)))))) (define (anchor->url tree) (post-order tree `((a ((@ ((*default* . ,(lambda args args))) . ,(lambda args args))) . ,(lambda (tag maybe-attrs . elems) (let* ((attrs (and (pair? maybe-attrs) (eq? '@ (car maybe-attrs)) (cdr maybe-attrs))) (href (assoc 'href attrs))) (and href (pair? elems) `(url ,(cadr href) ,(car elems)))))) (*text* . ,(lambda (trigger str) str))))) (define (extract-info-links sxml) (let* ((info-links ((sxpath `(// html body // ,(div-class "info") table // tr // (a (@ href)) )) sxml))) (anchor->url info-links))) (define (table->alist tree) (and (pair? tree) (filter (lambda (x) (not (null? x))) (post-order (car tree) `((tr ((td . ,(lambda (tag . elems) (let ((elems (filter (lambda (x) (not (null? x))) elems))) (or (and (pair? elems) (car elems)) '()))))) . ,(lambda (tag . elems) (let ((elems (filter (lambda (x) (and (not (and (string? x) (string-null? x))) (not (null? x)))) elems))) (cond ((and (pair? elems) (pair? (cdr elems))) (let ((key (car (string-split (car elems) ":"))) (value (cadr elems))) `(,key . ,value))) ((pair? elems) (let ((key (car (string-split (car elems) ":")))) (list key))) (else '()))))) (table . ,(lambda (tag . elems) elems)) (*text* . ,(lambda (trigger str) (string-trim-both str))) (*default* . ,(lambda args '()))))))) (define (extract-metadata sxml) (let* ((meta-data ((sxpath `(// html body // ,(div-class "info") // center table)) sxml)) (detail (table->alist ((sxpath `(// table // table)) (car meta-data)))) (articles (cadr meta-data)) (measurements (table->alist ((sxpath `(// table // table)) (caddr meta-data))))) `((detail . ,detail) (measurements . ,measurements)))) (define (extract-pages-from-search-results sxml) (let* ((pages ((sxpath `(// html body // center table tbody tr td input // a )) sxml))) (anchor->url pages))) (define (fetch-file url) (let ((ddir (get-data-dir)) (uri (uri-reference (cadr url)))) (if (not ddir) (error "cannot create download directory" ddir)) (if (not (uri? uri)) (error "URL to file must not be a relative reference" uri)) (let ((filepath (http-fetch uri ddir))) (if filepath (d "fetched ~s~%" filepath) (d "unable to fetch ~s~%" (uri->string uri))) filepath))) (define (make-index-page info morphology-files) (let ((title (sprintf "NeuroMorpho search results"))) (sxml->html `((literal "\n") (literal "") (html ,(header title) (body ,(titlebar title) ,(content (prelude title) `(table (tr ,(map (lambda (x) `(td ,x)) (index-fields) ) (td "Details") (td "Morphology file")) ,(map record-info info morphology-files)))) ))))) (define (record-info info morphology-file) (let ((detail (alist-ref 'detail (cdr info)))) (let ((index-vals (map (lambda (x) (alist-ref x detail string-ci=?)) (index-fields)))) `(tr ,@(map (lambda (v) `(td ,v)) index-vals) (td (a (@ (href ,(make-absolute-pathname (current-directory) (car info)))) "Details")) ,(or (and morphology-file `(td (a (@ (href ,(string-append "file://" (make-absolute-pathname (current-directory) morphology-file)))) "Morphology file"))) `(td "Morphology file not found")))))) (define (header title) `(head ; (link (@ (rel "stylesheet") ; (type "text/css") ; (href "http://chicken.wiki.br/common-css"))) (title ,title))) (define (titlebar title) `(div (@ (id "header")) (h1 (a (@ (href "http://neuromorpho.org/")) ,title)))) (define (prelude title) `()) (define (content . body) `(div (@ (id "content-box")) (div (@ (class "content")) ,body))) (define (sxml->html doc) (SRV:send-reply (pre-post-order doc ;; LITERAL tag contents are used as raw HTML. `((literal *preorder* . ,(lambda (tag . body) (map ->string body))) ,@universal-conversion-rules)))) (define (main) (let ((operands (opt '@)) (meta-filter (opt 'meta-filter)) (morphology-file (opt 'morphology-file))) (if (null? operands) (neuromorpho:usage)) (d "download directory is ~s~%" (get-data-dir)) (if meta-filter (d "metadata filter is ~s~%" meta-filter)) (let* ((data-list (concatenate (map (lambda (p) (let* ((search-results (parse-sxml p)) (page-list (extract-pages-from-search-results search-results)) (file-list (map fetch-file page-list)) (sxml-list (map parse-sxml file-list)) (meta-list (map extract-metadata sxml-list)) (links-list (map (lambda (sxml) `(links . ,(extract-info-links sxml))) sxml-list))) (map (lambda (f m l) `(,f ,@m ,l)) file-list meta-list links-list))) operands))) (meta-filtered-list (begin (if (and meta-filter (pair? meta-filter)) (let ((ops (map first meta-filter)) (ks (map second meta-filter)) (rxs (map (lambda (x) (regexp (third x) #t)) meta-filter))) (filter (lambda (x) (let ((detail (alist-ref 'detail (cdr x)))) (every (lambda (op k rx) (let* ((propval (alist-ref k detail string-ci=?)) (propval (or (and (string? propval) propval) "")) (m (string-match rx propval))) (case op ((=) m) ((!=) (not m)) (else m)))) ops ks rxs))) data-list)) data-list))) ) (if (opt 'print-metadata) (for-each print meta-filtered-list)) (let ((morphology-files (case morphology-file ((O S ORIGINAL STANDARD) (let* ((rx (regexp (regexp-escape (case morphology-file ((O ORIGINAL) "Morphology File (Original)") ((S STANDARD) "Morphology File (Standardized)") (else ""))))) (download-url? (lambda (x) (string-match rx (caddr x))))) (map (lambda (x) (let* ((links (alist-ref 'links (cdr x))) (morphology-url (find download-url? links))) (fetch-file morphology-url))) meta-filtered-list))) (else (list))))) (if (and (opt 'i) (not (null? morphology-files))) (let ((index-file (make-pathname (make-absolute-pathname (current-directory) (get-data-dir)) "index.html"))) (d "creating index file ~s...~%" index-file) (with-output-to-file index-file (lambda () (make-index-page meta-filtered-list morphology-files)))))))) ) (main)