;; Copyright 2020 Idiomdrottning. GPL 3.0 (or later). See COPYING. (import anaphora brev-separate (chicken io) (chicken process-context) (chicken string) define-options http-client html-parser mathh sxpath uri-common) (define-options comic-snarfer `((start-page "Just a plain URL to whatever the page you want to start at." (required #t) (value #t)) (image-path "An xpath pointing to the main snarfable content of the page." (required #t) (value #t)) (next-path "An xpath pointing to the next page." (required #t) (value #t)) (start-issue "Optional. Where to start internal prefix numbering." (required #f) (value (required "number") (transformer ,string->number)) (default 0)) (for-real "The snarfer assumes a dry-run unless you supply this." (required #f) (value #f)))) (define (fix-path url) (if (absolute-uri? (uri-reference url)) url (uri->string (uri-relative-to (uri-reference url) (uri-reference start-page))))) (define digits (compose inexact->exact add1 floor log10)) (define (zero-pad num) (conc (make-string (- 4 (digits num))#\0) (->string num))) (define-closure (fig start-issue) (issue) (set! fig (add1 fig)) (zero-pad fig)) (define (save url) (with-output-to-file (conc (issue) "-" (uri-encode-string url)) (fn (write-string (with-input-from-request url #f read-string))))) (if for-real (let loop ((page start-page)) (let* ((sxml (with-input-from-request (fix-path page) #f (cut html->sxml (current-input-port)))) (image ((sxpath image-path) sxml)) (link ((sxpath next-path) sxml))) (print page) (print image) (map (compose save fix-path) (map cadr image)) (unless (null? link) (loop (cadar link))))) (let* ((sxml (with-input-from-request start-page #f (cut html->sxml (current-input-port)))) (image ((sxpath image-path) sxml)) (link ((sxpath next-path) sxml))) (map print (map cadr image)) (print (cadar link))))