(use posix) (use srfi-1) (use numbers) (use ugarit-mime) (use srfi-37) (use miscmacros) (use fnmatch) (define *excludes* '()) (define *defines* '()) (define *total-objects* 0) (define *total-bytes* 0) (define (path-size path) (if (directory? path) (fold + 0 (map (lambda (relpath) (path-size (make-pathname path relpath))) (directory path))) ;; Not a directory (file-size path))) (define (included-file? path) (let ((name (pathname-strip-directory path))) (not (any (lambda (pattern) (fnmatch pattern name)) *excludes*)))) (define (get-jpeg-metadata path) ;; FIXME http://www.cipa.jp/std/documents/e/DC-008-2012_E.pdf '((dc:description . #f) (dc:spatial . #f) (dc:temporal . #f) (dc:creator . #f) (dc:created . #f) (dc:subject . #f))) (define debug (lambda x (void))) #;(define debug printf) (define (alist-update! alist key value) (let ((pair (assq key alist))) (if (and (pair? pair) (not (cdr pair))) (begin (set-cdr! pair value) ;; Exists but is #f, so overwrite alist) (cons (cons key value) alist)))) (define genres '((0 . "Blues") (1 . "Classic Rock") (2 . "Country") (3 . "Dance") (4 . "Disco") (5 . "Funk") (6 . "Grunge") (7 . "Hip-Hop") (8 . "Jazz") (9 . "Metal") (10 . "New Age") (11 . "Oldies") (12 . "Other") (13 . "Pop") (14 . "R&B") (15 . "Rap") (16 . "Reggae") (17 . "Rock") (18 . "Techno") (19 . "Industrial") (20 . "Alternative") (21 . "Ska") (22 . "Death Metal") (23 . "Pranks") (24 . "Soundtrack") (25 . "Euro-Techno") (26 . "Ambient") (27 . "Trip-Hop") (28 . "Vocal") (29 . "Jazz+Funk") (30 . "Fusion") (31 . "Trance") (32 . "Classical") (33 . "Instrumental") (34 . "Acid") (35 . "House") (36 . "Game") (37 . "Sound Clip") (38 . "Gospel") (39 . "Noise") (40 . "AlternRock") (41 . "Bass") (42 . "Soul") (43 . "Punk") (44 . "Space") (45 . "Meditative") (46 . "Instrumental Pop") (47 . "Instrumental Rock") (48 . "Ethnic") (49 . "Gothic") (50 . "Darkwave") (51 . "Techno-Industrial") (52 . "Electronic") (53 . "Pop-Folk") (54 . "Eurodance") (55 . "Dream") (56 . "Southern Rock") (57 . "Comedy") (58 . "Cult") (59 . "Gangsta") (60 . "Top 40") (61 . "Christian Rap") (62 . "Pop/Funk") (63 . "Jungle") (64 . "Native American") (65 . "Cabaret") (66 . "New Wave") (67 . "Psychadelic") (68 . "Rave") (69 . "Showtunes") (70 . "Trailer") (71 . "Lo-Fi") (72 . "Tribal") (73 . "Acid Punk") (74 . "Acid Jazz") (75 . "Polka") (76 . "Retro") (77 . "Musical") (78 . "Rock & Roll") (79 . "Hard Rock") (80 . "Folk") (81 . "Folk-Rock") (82 . "National Folk") (83 . "Swing") (84 . "Fast Fusion") (85 . "Bebob") (86 . "Latin") (87 . "Revival") (88 . "Celtic") (89 . "Bluegrass") (90 . "Avantgarde") (91 . "Gothic Rock") (92 . "Progressive Rock") (93 . "Psychedelic Rock") (94 . "Symphonic Rock") (95 . "Slow Rock") (96 . "Big Band") (97 . "Chorus") (98 . "Easy Listening") (99 . "Acoustic") (100 . "Humour") (101 . "Speech") (102 . "Chanson") (103 . "Opera") (104 . "Chamber Music") (105 . "Sonata") (106 . "Symphony") (107 . "Booty Bass") (108 . "Primus") (109 . "Porn Groove") (110 . "Satire") (111 . "Slow Jam") (112 . "Club") (113 . "Tango") (114 . "Samba") (115 . "Folklore") (116 . "Ballad") (117 . "Power Ballad") (118 . "Rhythmic Soul") (119 . "Freestyle") (120 . "Duet") (121 . "Punk Rock") (122 . "Drum Solo") (123 . "A capella") (124 . "Euro-House") (125 . "Dance Hall"))) (define (process-genre g) (if (and (char=? (string-ref g 0) #\() (char=? (string-ref g (- (string-length g) 1)) #\))) (let* ((genre-number (string->number (substring g 1 (- (string-length g) 1)))) (genre-lookup (assoc genre-number genres))) (if genre-lookup (cdr genre-lookup) g)) g)) (define (remove-trailing-term-if-present str) (if (zero? (string-length str)) str (if (char=? (string-ref str (- (string-length str) 1)) #\x00) (substring str 0 (- (string-length str) 1)) str))) (define (read-utf16-be bytes rb) (let ((str (make-string (/ bytes 2)))) (dotimes (idx (/ bytes 2)) (let* ((ch1 (rb)) (ch2 (rb))) (string-set! str idx (integer->char (+ (* ch1 256) ch2))))) str)) (define (read-utf16-le bytes rb) (let ((str (make-string (/ bytes 2)))) (dotimes (idx (/ bytes 2)) (let* ((ch2 (rb)) (ch1 (rb))) (string-set! str idx (integer->char (+ (* ch1 256) ch2))))) str)) (define (load-id3-string encoding bytes rb) (case encoding ((0) (let ((str (make-string bytes))) (dotimes (idx bytes) (string-set! str idx (integer->char (rb)))) str)) ((1) ;; Look for a BOM (if (or (< bytes 2) (not (zero? (remainder bytes 2)))) (let ((str (make-string bytes))) (dotimes (idx bytes) (string-set! str idx (integer->char (rb)))) (sprintf "Invalid UTF16 [~S]" str)) (begin (let* ((ch1 (rb)) (ch2 (rb))) (cond ((and (= ch1 #xff) (= ch2 #xfe)) (read-utf16-le (- bytes 2) rb)) ((and (= ch1 #xfe) (= ch2 #xff)) (read-utf16-be (- bytes 2) rb)) (else (string-append (make-string 1 (integer->char (+ (* 256 ch1) ch2))) (read-utf16-be (- bytes 2) rb)))))))) (else (let ((str (make-string bytes))) (dotimes (idx bytes) (string-set! str idx (integer->char (rb)))) (sprintf "Unknown encoding [~S] [~S]" encoding str))))) (define (get-mp3-metadata path) ;; http://id3.org/id3v2-00 / http://id3.org/id3v2.3.0 (with-input-from-file path (lambda () (let/cc abort* (let* ((metadata '((dc:creator . #f) (dc:created . #f) (dc:contributor . #f) (set:title . #f) (set:index . #f) (set:size . #f) (superset:index . #f) (superset:size . #f))) (abort (lambda () (printf " ;; Could not read metadata\n") (abort* metadata))) (read-id3v22 (lambda () (debug "Reading v2.2\n") (let* ((flags (read-byte)) (unsync (not (zero? (bitwise-and flags 128)))) (compressed (not (zero? (bitwise-and flags 64)))) (size4 (read-byte)) (size3 (read-byte)) (size2 (read-byte)) (size1 (read-byte)) (size (+ size1 (* 128 size2) (* 16384 size3) (* 2097152 size4))) (*bytes-left* size) (rb ;; read-byte replacement (if unsync ;; Unsynchronised mode (let ((last-byte #f)) (lambda () (set! *bytes-left* (- *bytes-left* 1)) (let ((b (read-byte))) (when (and (= last-byte #xff) (= b #x00)) ;; Skip #x00 unsync byte (set! *bytes-left* (- *bytes-left* 1)) (set! b (read-byte))) (set! last-byte b) b))) ;; Normal mode (lambda () (set! *bytes-left* (- *bytes-left* 1)) (read-byte))))) (debug "Read header, size = ~S, unsync = ~S\n" size unsync) (let loop ((md (alist-copy metadata))) (debug "Bytes left: ~S\n" *bytes-left*) (when (< *bytes-left* 10) (abort* md)) (let* ((fid1 (rb)) (fid2 (rb)) (fid3 (rb)) (fid (list->string (list (integer->char fid1) (integer->char fid2) (integer->char fid3)))) (fsize3 (rb)) (fsize2 (rb)) (fsize1 (rb)) (fsize (+ fsize1 (* 256 fsize2) (* 65536 fsize3))) (encoding (if (char=? (integer->char fid1) #\T) (rb) 0)) (remaining-size (if (char=? (integer->char fid1) #\T) (- fsize 1) fsize)) (data (remove-trailing-term-if-present (load-id3-string encoding remaining-size rb)))) (debug "FID: ~S size: ~S [~S]\n" fid fsize data) (cond ((string=? fid "\x00\x00\x00") (abort* md)) ((string=? fid "TT2") (loop (alist-update! md 'dc:title data))) ((string=? fid "TP1") (loop (alist-update! md 'dc:creator data))) ((string=? fid "TCM") (loop (alist-update! md 'dc:creator data))) ((string=? fid "TYE") (loop (alist-update! md 'dc:created data))) ((string=? fid "TCO") (loop (alist-update! md 'dc:subject (process-genre data)))) ((string=? fid "TAL") (loop (alist-update! md 'set:title data))) ((string=? fid "TRK") (and-let* ((parts (string-split data "/")) ((= (length parts) 2)) (track (string->number (first parts))) (tracks (string->number (second parts)))) (loop (alist-update! (alist-update! md 'set:index track) 'set:size tracks))) (loop md)) ((string=? fid "TPA") (and-let* ((parts (string-split data "/")) ((= (length parts) 2)) (volume (string->number (first parts))) (volumes (string->number (second parts)))) (loop (alist-update! (alist-update! md 'superset:index volume) 'superset:size volumes))) (loop md)) (else (printf " ;; Unknown ID3 tag ~S=~S\n" fid (string-append (make-string 1 (integer->char encoding)) data)) (loop md)))))))) (read-id3v23 (lambda () (debug "Reading v2.3\n") (let* ((flags (read-byte)) (unsync (not (zero? (bitwise-and flags 128)))) (extended (not (zero? (bitwise-and flags 64)))) (experimental (not (zero? (bitwise-and flags 32)))) (size4 (read-byte)) (size3 (read-byte)) (size2 (read-byte)) (size1 (read-byte)) (size (+ size1 (* 128 size2) (* 16384 size3) (* 2097152 size4))) (*bytes-left* size) (rb ;; read-byte replacement (if unsync ;; Unsynchronised mode (let ((last-byte #f)) (lambda () (set! *bytes-left* (- *bytes-left* 1)) (let ((b (read-byte))) (when (and (= last-byte #xff) (= b #x00)) ;; Skip #x00 unsync byte (set! *bytes-left* (- *bytes-left* 1)) (set! b (read-byte))) (set! last-byte b) b))) ;; Normal mode (lambda () (set! *bytes-left* (- *bytes-left* 1)) (read-byte))))) (debug "Read header, size = ~S, unsync = ~S\n" size unsync) (when extended (debug "Reading extended header\n") (let* ((size4 (rb)) (size3 (rb)) (size2 (rb)) (size1 (rb)) (size (+ size1 (* 256 size2) (* 65536 size3) (* 16777216 size4))) (eflags1 (rb)) (eflags2 (rb))) (when (not (zero? (bitwise-and eflags1 128))) ;; Skip CRC (rb) (rb) (rb) (rb)))) (let loop ((md (alist-copy metadata))) (debug "Bytes left: ~S\n" *bytes-left*) (when (< *bytes-left* 10) (abort* md)) (let* ((fid1 (rb)) (fid2 (rb)) (fid3 (rb)) (fid4 (rb)) (fid (list->string (list (integer->char fid1) (integer->char fid2) (integer->char fid3) (integer->char fid4)))) (fsize4 (rb)) (fsize3 (rb)) (fsize2 (rb)) (fsize1 (rb)) (fsize (+ fsize1 (* 256 fsize2) (* 65536 fsize3) (* 16777216 fsize4))) (flags1 (rb)) (flags2 (rb)) (encoding (if (char=? (integer->char fid1) #\T) (rb) 0)) (remaining-size (if (char=? (integer->char fid1) #\T) (- fsize 1) fsize)) (data (remove-trailing-term-if-present (load-id3-string encoding remaining-size rb)))) (debug "FID: ~S size: ~S [~S]\n" fid fsize data) (cond ((string=? fid "\x00\x00\x00\x00") (abort* md)) ((string=? fid "TIT2") (loop (alist-update! md 'dc:title data))) ((string=? fid "TPE1") (loop (alist-update! md 'dc:creator data))) ((string=? fid "TCOM") (loop (alist-update! md 'dc:creator data))) ((string=? fid "TPE2") (loop (alist-update! md 'dc:contributor data))) ((string=? fid "TPE3") (loop (alist-update! md 'dc:contributor data))) ((string=? fid "TPE4") (loop (alist-update! md 'dc:contributor data))) ((string=? fid "TPUB") (loop (alist-update! md 'dc:publisher data))) ((string=? fid "TYER") (loop (alist-update! md 'dc:created data))) ((string=? fid "TCON") (loop (alist-update! md 'dc:subject (process-genre data)))) ((string=? fid "TALB") (loop (alist-update! md 'set:title data))) ((string=? fid "TRCK") (and-let* ((parts (string-split data "/")) ((= (length parts) 2)) (track (string->number (first parts))) (tracks (string->number (second parts)))) (loop (alist-update! (alist-update! md 'set:index track) 'set:size tracks))) (loop md)) ((string=? fid "TPOS") (and-let* ((parts (string-split data "/")) ((= (length parts) 2)) (volume (string->number (first parts))) (volumes (string->number (second parts)))) (loop (alist-update! (alist-update! md 'superset:index volume) 'superset:size volumes))) (loop md)) (else (printf " ;; Unknown ID3 tag ~S=~S\n" fid (if (< (string-length data) 160) data (string-append (substring data 0 100) "..."))) (loop md))))))))) (when (not (= (read-byte) #x49)) (abort)) (when (not (= (read-byte) #x44)) (abort)) (when (not (= (read-byte) #x33)) (abort)) (let ((version1 (read-byte)) (version2 (read-byte))) (case version1 ((2) (read-id3v22)) ((3) (read-id3v23)) (else (abort))))))))) (define (read-ogg-lacing) (let loop ((lacing 0) (bytes 1)) (let ((byte (read-byte))) (if (= byte 255) (loop (+ lacing byte) (+ bytes 1)) (values bytes (+ lacing byte)))))) (define (read-ogg-page abort) (when (not (= (read-byte) #x4f)) ;; OggS magic number (abort)) (when (not (= (read-byte) #x67)) (abort)) (when (not (= (read-byte) #x67)) (abort)) (when (not (= (read-byte) #x53)) (abort)) (when (not (= (read-byte) #x00)) ;; Version (abort)) (read-byte) ;; Header type (read-byte) ;; Granule position (read-byte) (read-byte) (read-byte) (read-byte) (read-byte) (read-byte) (read-byte) (read-byte) ;; Stream serial number (read-byte) (read-byte) (read-byte) (read-byte) ;; Page sequence number (read-byte) (read-byte) (read-byte) (read-byte) ;; Page checksum (read-byte) (read-byte) (read-byte) (let ((segments (read-byte))) (let ((segment-sizes (reverse (let loop ((segments-left segments) (segment-sizes '())) (if (zero? segments-left) segment-sizes (receive (bytes lacing) (read-ogg-lacing) (loop (- segments-left bytes) (cons lacing segment-sizes)))))))) (let loop ((ss segment-sizes) (segments '())) (if (null? ss) (reverse segments) (let ((data (read-string (car ss)))) (loop (cdr ss) (cons data segments)))))))) (define (read-ogg-u32) (let* ((s1 (read-byte)) (s2 (read-byte)) (s3 (read-byte)) (s4 (read-byte))) (+ s1 (* 256 s2) (* 65536 s3) (* 16777216 s4)))) (define (read-ogg-string) (let ((length (read-ogg-u32))) (read-string length))) (define (parse-ogg-comment str) (let ((sep-pos (string-index str #\=))) (if sep-pos (cons (string-take str sep-pos) (string-drop str (+ sep-pos 1))) str))) (define (parse-ogg-comments comments abort) (with-input-from-string comments (lambda () (let ((type (read-byte))) (unless (= type 3) (abort))) (unless (= (read-byte) #x76) #; "vorbis" (abort)) (unless (= (read-byte) #x6f) (abort)) (unless (= (read-byte) #x72) (abort)) (unless (= (read-byte) #x62) (abort)) (unless (= (read-byte) #x69) (abort)) (unless (= (read-byte) #x73) (abort)) (let ((vendor (read-ogg-string))) (debug "Vendor: [~S]\n" vendor)) (let loop ((comments-left (read-ogg-u32)) (comments '())) (if (zero? comments-left) comments (begin (loop (- comments-left 1) (cons (parse-ogg-comment (read-ogg-string)) comments)))))))) (define (get-ogg-metadata path) ;; http://xiph.org/vorbis/doc/framing.html ;; http://xiph.org/vorbis/doc/v-comment.html (with-input-from-file path (lambda () (let/cc abort* (let* ((metadata '((dc:creator . #f) (dc:created . #f) (dc:contributor . #f) (set:title . #f) (set:index . #f) (set:size . #f) (superset:index . #f) (superset:size . #f))) (abort (lambda () (printf " ;; Could not read metadata\n") (abort* metadata)))) ;; Skip header (read-ogg-page abort) ;; Read comment header (let* ((comments-page (read-ogg-page abort)) (comments-packet (first comments-page)) (comments (parse-ogg-comments comments-packet abort))) ;; Process comments (let loop ((md (alist-copy metadata)) (comments comments)) (if (null? comments) md (let ((key (car (car comments))) (value (cdr (car comments)))) (cond ((string-ci= key "artist") (loop (alist-update! md 'dc:creator value) (cdr comments))) ((string-ci= key "performer") (loop (alist-update! md 'dc:contributor value) (cdr comments))) ((string-ci= key "producer") (loop (alist-update! md 'dc:contributor value) (cdr comments))) ((string-ci= key "title") (loop (alist-update! md 'dc:title value) (cdr comments))) ((string-ci= key "album") (loop (alist-update! md 'set:title value) (cdr comments))) ((string-ci= key "tracknumber") (loop (alist-update! md 'set:index value) (cdr comments))) ((string-ci= key "totaltracks") (loop (alist-update! md 'set:size value) (cdr comments))) ((string-ci= key "discnumber") (loop (alist-update! md 'superset:index value) (cdr comments))) ((string-ci= key "totaldiscs") (loop (alist-update! md 'superset:size value) (cdr comments))) ((string-ci= key "date") (loop (alist-update! md 'dc:created value) (cdr comments))) ((string-ci= key "originaldate") (loop (alist-update! md 'dc:created value) (cdr comments))) ((string-ci= key "genre") (loop (alist-update! md 'dc:subject value) (cdr comments))) (else (printf " ;; Unknown Ogg tag ~S=~S\n" key value) (loop md (cdr comments))))))))))))) (define (get-pdf-metadata path) '((dc:creator . #f) (dc:subject . #f) (dc:description . #f) (dc:created . #f) (dc:identifier . #f) (dc:source . #f))) (define (get-ps-metadata path) '((dc:creator . #f) (dc:subject . #f) (dc:description . #f) (dc:created . #f) (dc:identifier . #f) (dc:source . #f))) (define (generate-manifest path) (if (directory? path) ;; Recurse therein (for-each (lambda (relname) (generate-manifest (make-pathname path relname))) (sort! (directory path) string<)) ;; Not a directory (when (included-file? path) (let* ((name (pathname-file path)) (*got-name* #f) (print-item (lambda (item) (if (and (eq? (car item) 'dc:title) (cdr item)) (set! *got-name* #f)) (if (cdr item) (printf " (~S = ~S)\n" (car item) (cdr item)) (printf " #;(~S = \"\")\n" (car item))))) (mime-type (extension->mimetype (string-append "." (or (pathname-extension path) ""))))) (printf "(object ~S\n" path) (printf " (filename = ~S)\n" (pathname-strip-directory path)) (printf " (dc:format = ~S)\n" mime-type) (for-each print-item *defines*) (newline) (cond ((string=? mime-type "image/jpeg") (for-each print-item (get-jpeg-metadata path))) ((string=? mime-type "audio/mpeg") (for-each print-item (get-mp3-metadata path))) ((string=? mime-type "audio/ogg") (for-each print-item (get-ogg-metadata path))) ((string=? mime-type "application/pdf") (for-each print-item (get-pdf-metadata path))) ((string=? mime-type "application/postscript") (for-each print-item (get-ps-metadata path))) (else (unless *got-name* (printf " (dc:title = ~S)\n" name)) (printf " #;(description = \"\")\n") (printf " ;; No metadata extraction available for ~S files" (pathname-extension path)))) (newline) (let ((stat (file-stat path))) (printf " (mtime = ~S)\n" (vector-ref stat 8)) (printf " (ctime = ~S)\n" (vector-ref stat 7))) (let ((size (path-size path))) (printf " (file-size = ~S))\n" size) (set! *total-bytes* (+ *total-bytes* size)) (set! *total-objects* (+ *total-objects* 1))) (newline))))) (define (usage) (with-output-to-port (current-error-port) (lambda () (printf "Usage: ugarit-manifest-maker [options...] [files...]\n") (printf " -e Exclude files matching pattern\n") (printf " -D = Define default metadata for all files\n"))) (exit 1)) (define things-to-import (args-fold (command-line-arguments) (list (option '(#\h "help") #f #f (lambda _ (usage))) (option '(#\e "exclude") #t #f (lambda (o n x vals) (push! x *excludes*) vals)) (option '(#\D "define") #t #f (lambda (o n x vals) (let ((pos (string-index x #\=))) (unless pos (usage)) (let ((key (string->symbol (string-take x pos))) (value (with-input-from-string (string-drop x (+ pos 1)) read))) (push! (cons key value) *defines*))) vals))) (lambda (o n x vals) (usage)) cons '())) (for-each generate-manifest things-to-import) (define (format-bytes b) (cond ((<= (* 1024 1024 1024 1024) b) (sprintf "~ATiB" (inexact->exact (round (/ b 1024 1024 1024 1024))))) ((<= (* 1024 1024 1024) b) (sprintf "~AGiB" (inexact->exact (round (/ b 1024 1024 1024))))) ((<= (* 1024 1024) b) (sprintf "~AMiB" (inexact->exact (round (/ b 1024 1024))))) ((<= (* 1024) b) (sprintf "~AKiB" (inexact->exact (round (/ b 1024))))) (else (sprintf "~AB" b)))) (printf ";;; TOTAL: ~S objects / ~A\n" *total-objects* (format-bytes *total-bytes*))