;;; rss.scm (module rss (rss:item? rss:item-title rss:item-link rss:item-description rss:item-attributes rss:item-attribute rss:feed? rss:feed-version rss:feed-channel rss:feed-items rss:item-enclosure rss:enclosure-type rss:enclosure-length rss:enclosure? rss:enclosure-url rss:item=? rss:read) (import scheme chicken extras) (require-extension srfi-1 matchable ssax) (define namespace-prefixes '((rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#") (dc . "http://purl.org/dc/elements/1.1/") (sy . "http://purl.org/rss/1.0/modules/syndication/") (admin . "http://webns.net/mvcb/") (rss . "http://purl.org/rss/1.0/") (rdf . "http://my.netscape.com/rdf/simple/0.9/") (itunes . "http://www.itunes.com/dtds/podcast-1.0.dtd") (content . "http://purl.org/rss/1.0/modules/content/") (atom . "http://www.w3.org/2005/Atom") ) ) (define-record-type rss:item (make-rss:item title link description enclosure attributes) rss:item? (title rss:item-title rss:item-title-set!) (link rss:item-link rss:item-link-set!) (description rss:item-description rss:item-description-set!) (enclosure rss:item-enclosure rss:item-enclosure-set!) (attributes rss:item-attributes rss:item-attributes-set!) ) (define-record-type rss:feed (make-rss:feed version channel items) rss:feed? (version rss:feed-version rss:feed-version-set!) (channel rss:feed-channel rss:feed-channel-set!) (items rss:feed-items rss:feed-items-set!) ) (define-record-type rss:enclosure (make-rss:enclosure type url length) rss:enclosure? (type rss:enclosure-type rss:enclosure-type-set!) (url rss:enclosure-url rss:enclosure-url-set!) (length rss:enclosure-length rss:enclosure-length-set!) ) (define (sxml->enclosure sxml) (let ((enc (make-rss:enclosure #f #f #f))) (letrec ((parse (match-lambda ['() enc] [('@ rest ...) (parse rest)] [(('url url) rest ...) (rss:enclosure-url-set! enc url) (parse rest)] [(('length len) rest ...) (rss:enclosure-length-set! enc len) (parse rest)] [(('type type) rest ...) (rss:enclosure-type-set! enc type)(parse rest)] [ x (warn "uncatched pattern ~S" x)] ))) (parse sxml)))) (define (rss:item-attribute item attr) (let ([attrs (rss:item-attributes item)]) (and-let* ([a (assq attr attrs)]) (cdr a) ) ) ) (define (rss:item=? i1 i2) (or (and-let* ([g1 (rss:item-attribute i1 'guid)] [g2 (rss:item-attribute i2 'guid)] ) (string=? g1 g2) ) (equal? i1 i2) ) ) (define feed #f) (define (rss:read . port) (let ([sxml (ssax:xml->sxml (if (pair? port) (car port) (current-input-port)) namespace-prefixes)]) ;;(pp sxml (current-error-port)) (fluid-let ([feed (make-rss:feed #f #f '())]) (traverse sxml) (rss:feed-items-set! feed (reverse (rss:feed-items feed))) feed) ) ) (define cleanup (match-lambda [('*TOP* items ...) (let loop ([items items]) (match (car items) [('@ . _) (loop (cdr items))] [('@@ . _) (loop (cdr items))] [('*PI* . _) (loop (cdr items))] [('*NAMESPACES* . _) (loop (cdr items))] [x x] ) ) ] [x x] ) ) (define (rss:error msg . args) (signal (make-composite-condition (make-property-condition 'rss) (make-property-condition 'exn 'message msg 'arguments args) ) ) ) (define (traverse sxml) (match (cleanup sxml) [('rss ('@ attrs ...) data ...) (check-version attrs) (for-each (cut traverse-element <> #f) data) ] [('rdf:RDF data ...) (rss:feed-version-set! feed "1.0") (for-each (cut traverse-element <> #t) data) ] [_ (rss:error "invalid root element" sxml)] ) ) (define (check-version attrs) (for-each (match-lambda [('version v) (rss:feed-version-set! feed v) ] ) attrs) ) (define last-channel #f) (define channel #f) (define item #f) (define (warn fstr . args) (printf "Warning: ~?~%" fstr args) ) (define (traverse-element sxml rdf) (let rec ([sxml sxml]) (match sxml [((or 'channel 'rss:channel 'rdf:channel) elts ...) (fluid-let ([channel (make-rss:item #f #f #f #f '())]) (set! last-channel channel) (rss:feed-channel-set! feed channel) (for-each rec elts) ) ] [((or 'item 'rss:item 'rdf:item) elts ...) (fluid-let ([item (make-rss:item #f #f #f #f '())]) (rss:feed-items-set! feed (cons item (rss:feed-items feed))) (for-each rec elts) ) ] [((or 'title 'rss:title 'rdf:title) title) (let ([d (or item channel last-channel)]) (if d (begin (rss:item-title-set! d title) (rss:item-attributes-set! d (alist-cons 'title title (rss:item-attributes d))) ) (warn "tag `title' with content ~S in wrong context" title) ) ) ] [((or 'link 'rss:link 'rdf:link) link) (let ([d (or item channel last-channel)]) (if d (begin (rss:item-link-set! d link) (rss:item-attributes-set! d (alist-cons 'link link (rss:item-attributes d))) ) (warn "tag `link' with content ~S in wrong context" link) ) ) ] [((or 'enclosure 'rss:enclosure 'rdf:enclosure) enclosure) (let ([d (or item channel last-channel)]) (if d (begin (rss:item-enclosure-set! d (sxml->enclosure enclosure)) (rss:item-attributes-set! d (alist-cons 'enclosure enclosure (rss:item-attributes d))) ) (warn "tag `enclosure' with content ~S in wrong context" enclosure) ) ) ] [((or 'description 'rss:description 'rdf:description) description) (let ([d (or item channel last-channel)]) (if d (begin (rss:item-description-set! d description) (rss:item-attributes-set! d (alist-cons 'description description (rss:item-attributes d))) ) (warn "tag `description' with content ~S in wrong context" description) ) ) ] [(tag ('@ . _) . more) (rec (cons tag more)) ] [(tag . more) (let ([d (or item channel last-channel)]) (if d (rss:item-attributes-set! d (alist-cons tag (cond [(null? more) #t] [(null? (cdr more)) (car more)] [else more] ) (rss:item-attributes d) ) ) (warn "tag `~A' with content ~S in wrong context" tag more) ) (for-each rec more) ) ] [_ #f] ) ) ) )