(module ugarit-files (init-file-subsystem store-file! write-file-contents fold-file unlink-file! store-directory! unlink-directory! extract-directory! make-dirent make-dirent-from-sexpr dirent? dirent-subnode-key dirent-name dirent-type dirent-props fold-leaf-object extract-object! ) (import scheme) (import chicken) (use ugarit-core) (use ugarit-streams) (use directory-rules) (use matchable) (use posix) (use posix-extras) (use srfi-1) (use srfi-4) (use extras) (use files) (use miscmacros) (use data-structures) (use sql-de-lite) (define-record-type vault-subsys (make-vault-subsys enabled? get-query set-query) vault-subsys? (enabled? vault-subsys-file-cache-enabled?) (get-query vault-subsys-get-query) (set-query vault-subsys-set-query)) (define (init-file-subsystem cache file-cache-enabled?) (when file-cache-enabled? (ensure-table cache "files" "CREATE TABLE files (path TEXT PRIMARY KEY, mtime INTEGER, size INTEGER, key TEXT);")) (if file-cache-enabled? (make-vault-subsys #t (sql cache "SELECT key FROM files WHERE path = ? AND mtime = ? AND size = ?") (sql cache "INSERT OR REPLACE INTO files (path,mtime,size,key) VALUES (?,?,?,?)")) (make-vault-subsys #f #f #f))) (define (vault-file-cache? vault) (vault-subsys-file-cache-enabled? (vault-file-subsystem vault))) (define (vault-file-cache-get-query vault) (vault-subsys-get-query (vault-file-subsystem vault))) (define (vault-file-cache-set-query vault) (vault-subsys-set-query (vault-file-subsystem vault))) (define (file-cache-put! vault file-path mtime size key) (exec (vault-file-cache-set-query vault) file-path mtime size key) (vault-cache-updated! vault)) (define (file-cache-get vault file-path mtime size) (let ((data (query fetch (vault-file-cache-get-query vault) file-path mtime size))) (if (pair? data) (car data) #f))) ;; FILE STORAGE ;; Files are stored as either: ;; 1) A direct block of type "f" containing the file data ;; 2) An indirect block of type "fi" that's a keystream of keys of direct or indirect blocks ;; Returns key and reused? (define (store-file! vault file-path file-stat) (let* ((store-file-without-caching! (lambda () ;; Actually upload the file ;; IDEA: memory-map the file in 1MB chunks, and ;; copy them into u8vectors? (let* ((fd (file-open file-path (+ open/rdonly open/binary))) (blocksize (vault-max-block-size vault)) (*buffer* (make-blob blocksize)) (ksw (make-key-stream-writer* vault 'fi))) (letrec ((upload-file (lambda () (let* ((result (file-read fd blocksize *buffer*)) (bytes-read (cadr result))) (if (not (zero? bytes-read)) (begin (job-progress! 'file-block-start #f #f bytes-read) (let-values (((data-key data-reused?) (vault-store-block! vault (subu8vector (blob->u8vector/shared *buffer*) 0 bytes-read) 'f))) ((key-stream-writer-write! ksw) data-key data-reused?) (job-progress! 'file-block-end #f #f #f) (upload-file))) (begin (file-close fd) ((key-stream-writer-finish! ksw)))))))) (upload-file))))) (store-file-and-cache! (lambda (mtime size) (let-values (((key reused?) (store-file-without-caching!))) (file-cache-put! vault file-path mtime size key) (values key reused?))))) (check-vault-writable vault) (job-progress! 'file-start file-path #f (vector-ref file-stat 5)) ;; Firstly, if we have an mtime cache, use it to see if the file is already in the vault ;; The cache is keyed on file paths, and the contents are ;; sexprs of the form (mtime hash) (receive (key reused?) (if (vault-file-cache? vault) (let* ((mtime (vector-ref file-stat 8)) ; Should have used and-let* (size (vector-ref file-stat 5)) (cache-result (file-cache-get vault file-path mtime size))) (if (and cache-result (vault-exists? vault cache-result)) (begin (inc! (job-file-cache-hits (current-job))) (inc! (job-file-cache-bytes (current-job)) size) (values cache-result #t)) ; Found in cache! Woot! (store-file-and-cache! mtime size))) ; not in cache (store-file-without-caching!)) ; no mtime cache (begin (job-progress! 'file-end #f #f #f) (values key reused?))))) ;; Call kons on each u8vector block of the file in turn ;; with an accumulator that starts as knil as a second argument (define (fold-file vault key kons knil) (fold-key-stream vault key 'fi (lambda (key type acc) (kons (vault-get vault key type) acc)) knil)) ;; Write the contents of the file to the supplied posix fd (define (write-file-contents vault key fd) (fold-file vault key (lambda (block acc) (begin (job-progress! 'file-block-start #f #f (u8vector-length block)) (file-write fd (u8vector->blob/shared block)) (job-progress! 'file-block-end #f #f #f) #f)) #f)) (define (unlink-file! vault key) (check-vault-unlinkable vault) (unlink-key-stream! vault key 'fi (lambda (vault key type) (vault-unlink! vault key)))) ;; DIRECTORY STORAGE ;; Directories are stored as either; ;; 1) A direct block of type "d" containing a list of file/directory entries, each of which is an s-expr ;; The car of the s-expr is the file name ;; The cadr is a type symbol - file, dir, symlink, chardev, blockdev, fifo, socket ;; The cddr is an alist of other properties ;; Regular files have a 'content entry containing a key, for example. ;; Also look out for 'mode 'uid 'gid 'atime 'mtime 'ctime ;; Symlinks have 'target ;; Directories have 'content, too ;; Files with streams or forks or whatnot can have more than one content key, of course... ;; 2) An indirect block of type "di" that's a keystream of keys to direct or indirect blocks ;; Look for a .ugarit file in the given directory ;; If one is found, return its contents (define (read-local-rules vault path) (let ((conf-file (make-pathname path ".ugarit"))) (if (file-exists? conf-file) (with-input-from-file conf-file read-file) '()))) ;; Do the rules list say to ignore the file? ;; Statements towards the head of the list take priority ;; And we want to accept the most recent 'ignore' or 'include', ;; defaulting to 'include' if neither is found (define (rules-say-ignore rules) (match rules ('() #f) ((('exclude) . _) #t) ((('include) . _) #f) ((_ . more) (rules-say-ignore more)))) ;; Store a directory entry for the given filename, which is in the ;; directory identified by path. The rules-checker is consulted to ;; decide what to do, and the resulting directory entry is written to ;; ssw (an sexpr stream writer) in sexpr format; this procedure ;; and make-dirent-from-sexpr need to agree on that format! ;; Returns the number of files and bytes in the entry. (define (store-directory-entry! vault path filename ssw rules-checker) (let* ((file-path (make-pathname path filename)) (stats (file-stat file-path #t)) (mode (bitwise-and (vector-ref stats 1) (bitwise-not stat/ifmt))) (uid (vector-ref stats 3)) (gid (vector-ref stats 4)) (atime (vector-ref stats 6)) (ctime (vector-ref stats 7)) (mtime (vector-ref stats 8)) (type (bitwise-and (vector-ref stats 1) stat/ifmt)) (standard-file-attributes (list (cons 'mode mode) (cons 'uid uid) (cons 'gid gid) (cons 'mtime mtime))) (file-rules (object-matches filename rules-checker))) (when (job-store-ctime? (current-job)) (set! standard-file-attributes (cons (cons 'ctime ctime) standard-file-attributes))) (when (job-store-atime? (current-job)) (set! standard-file-attributes (cons (cons 'atime atime) standard-file-attributes))) (if (and (job-use-rules? (current-job)) (rules-say-ignore file-rules)) (values 0 0) (cond ((eq? type stat/ifsock) (job-log! 'warning file-path "Ignoring a socket") (values 0 0)) ((eq? type stat/ifreg) (let-values (((content-key content-reused?) (store-file! vault file-path stats))) ((sexpr-stream-writer-write! ssw) (append (list filename 'file (cons 'contents content-key) (cons 'size (vector-ref stats 5))) standard-file-attributes) (list (cons content-key content-reused?))) (values 1 (vector-ref stats 5)))) ((eq? type stat/ifdir) (let-values (((content-key content-reused? files bytes) (store-directory! vault file-path))) ((sexpr-stream-writer-write! ssw) (append (list filename 'dir (cons 'contents content-key) (cons 'files files) (cons 'size bytes)) standard-file-attributes) (list (cons content-key content-reused?))) (values files bytes))) ((eq? type stat/iflnk) (let ((target (read-symbolic-link file-path))) (job-progress! 'file-start file-path #f (string-length target)) (job-progress! 'file-block-start #f #f (string-length target)) ((sexpr-stream-writer-write! ssw) (append (list filename 'symlink (cons 'target target) (cons 'size (string-length target))) standard-file-attributes) '()) (job-progress! 'file-block-end #f #f #f) (job-progress! 'file-end #f #f #f) (values 1 (string-length target)))) ((eq? type stat/ifblk) (let ((devnum (vector-ref stats 10))) (job-progress! 'file-start path #f 0) (job-progress! 'file-block-start #f #f 0) ((sexpr-stream-writer-write! ssw) (append (list filename 'block-device (cons 'number devnum)) standard-file-attributes) '()) (job-progress! 'file-block-end #f #f #f) (job-progress! 'file-end #f #f #f) (values 1 0))) ((eq? type stat/ifchr) (let ((devnum (vector-ref stats 10))) (job-progress! 'file-start path #f 0) (job-progress! 'file-block-start #f #f 0) ((sexpr-stream-writer-write! ssw) (append (list filename 'character-device (cons 'number devnum)) standard-file-attributes) '()) (job-progress! 'file-block-end #f #f #f) (job-progress! 'file-end #f #f #f) (values 1 0))) ((eq? type stat/ififo) (job-progress! 'file-start path #f 0) (job-progress! 'file-block-start #f #f 0) ((sexpr-stream-writer-write! ssw) (append (list filename 'fifo) standard-file-attributes) '()) (job-progress! 'file-block-end #f #f #f) (job-progress! 'file-end #f #f #f) (values 1 0)) (else ; WTF? (job-log! 'error file-path "Unable to store object of unknown type") (values 0 0)))))) ;; Store a directory ;; Returns the usual key and reused? values, then the total files and ;; bytes in the directory (define (store-directory! vault path) (call-with-context (read-local-rules vault path) path (lambda () (check-vault-writable vault) (let ((ssw (make-sexpr-stream-writer* vault 'd 'di)) (rules-checker (make-filesystem-object-pattern-checker path)) (file-list (sort! (directory path #t) string>?))) (job-progress! 'dir-start path (length file-list) #f) (let ((sizes (fold (lambda (filename sizes) (condition-case (receive (files bytes) (store-directory-entry! vault path filename ssw rules-checker) (cons (+ files (car sizes)) (+ bytes (cdr sizes)))) (exn (exn i/o file) (job-log! 'error (make-pathname path filename) (sprintf "Unable to store into the vault (~a)" ((condition-property-accessor 'exn 'message "Unknown error") exn))) sizes))) (cons 0 0) file-list))) (receive (key reused?) ((sexpr-stream-writer-finish! ssw)) (begin (job-progress! 'dir-end #f #f #f) (values key reused? (car sizes) (cdr sizes))))))))) (define (unlink-directory! vault key) (check-vault-unlinkable vault) (unlink-sexpr-stream! vault key 'd 'di (lambda (dirent) (let ((type (cadr dirent)) (name (car dirent)) (props (cddr dirent))) (cond ((eq? type 'file) (unlink-file! vault (cdr (assq 'contents props)))) ((eq? type 'dir) (unlink-directory! vault (cdr (assq 'contents props))))))))) (define (set-standard-file-metadata! vault path props) (let ((mode (assq 'mode props)) (uid (assq 'uid props)) (gid (assq 'gid props)) (mtime (assq 'mtime props)) (atime (assq 'atime props))) (if (or uid gid) (handle-exceptions exn (job-log! 'warning path "Unable to set the uid/gid") (change-file-owner path (if uid (cdr uid) (current-user-id)) (if gid (cdr gid) (current-group-id))))) (if mode (change-file-mode path (cdr mode))) (if (or mtime atime) (change-file-times path (if atime (cdr atime) (current-seconds)) (if mtime (cdr mtime) (current-seconds)))) (void))) (define (extract-file! vault props path) (let ((size (assq 'size props))) (if size (job-progress! 'file-start path #f (cdr size)) (job-progress! 'file-start path #f #f))) (let ((contents-key (cdr (assq 'contents props))) ;; Initialise perms to 0000 while we fill it up (fd (file-open path (+ open/wronly open/creat) 0))) (write-file-contents vault contents-key fd) (file-close fd)) (set-standard-file-metadata! vault path props) (job-progress! 'file-end #f #f #f)) (define (extract-inline! vault props path) (let ((size (assq 'size props))) (if size (job-progress! 'file-start path #f (cdr size)) (job-progress! 'file-start path #f #f))) (let ((contents (cdr (assq 'text props))) ;; Initialise perms to 0000 while we fill it up (fd (file-open path (+ open/wronly open/creat) 0))) (file-write fd contents) (file-close fd)) (set-standard-file-metadata! vault path props) (job-progress! 'file-end #f #f #f)) (define (extract-subdirectory! vault props path) (unless (directory? path) (create-directory path) ;; Initialise perms to 0700 while we fill it up (change-file-mode path (+ perm/irusr perm/iwusr perm/ixusr))) (let ((contents-key (cdr (assq 'contents props)))) (extract-directory! vault contents-key path) (set-standard-file-metadata! vault path props))) (define (extract-symlink! vault props path) (let ((target (cdr (assq 'target props))) (mode (assq 'mode props)) (uid (assq 'uid props)) (gid (assq 'gid props)) (mtime (assq 'mtime props)) (atime (assq 'atime props))) (job-progress! 'file-start path #f (string-length target)) (job-progress! 'file-block-start #f #f (string-length target)) (create-symbolic-link target path) (job-progress! 'file-block-end #f #f #f) ;; Alas, there is no portable way to set the atime/mtime on a link. ;; I think, somehow, we will manage to live our lives without the atime and mtime on links... (if mode (handle-exceptions exn (job-log! 'warning path "Unable to set the mode of a link") (change-link-mode path (cdr mode)))) (job-progress! 'file-end #f #f #f) (if (or uid gid) (handle-exceptions exn (job-log! 'warning path "Unable to set the uid/gid of a link") (change-link-owner path (if uid (cdr uid) (current-user-id)) (if gid (cdr gid) (current-group-id))))))) (define (extract-fifo! vault props path) (job-progress! 'file-start path #f 0) (create-fifo path) (set-standard-file-metadata! vault path props) (job-progress! 'file-end #f #f #f)) (define (extract-block-device! vault props path) (let ((number (cdr (assq 'number props)))) (handle-exceptions exn (job-log! 'warning path "Unable to recreate block device") (job-progress! 'file-start path #f 0) (create-special-file path stat/ifblk number) (set-standard-file-metadata! vault path props)) (job-progress! 'file-end #f #f #f))) (define (extract-character-device! vault props path) (let ((number (cdr (assq 'number props)))) (handle-exceptions exn (job-log! 'warning path "Unable to recreate character device") (job-progress! 'file-start path #f 0) (create-special-file path stat/ifchr number) (set-standard-file-metadata! vault path props)) (job-progress! 'file-end #f #f #f))) (define (extract-directory! vault key target-path) (job-progress! 'dir-start target-path #f #f) (fold-sexpr-stream vault key 'd 'di (lambda (dirent-sexpr acc) (let ((dirent (make-dirent-from-sexpr dirent-sexpr))) (condition-case (begin (extract-object! vault dirent target-path #t)) (exn (exn i/o file) (job-log! 'error (make-pathname target-path (dirent-name dirent)) (sprintf "Unable to extract from the vault (~a)" ((condition-property-accessor 'exn 'message "Unknown error") exn)))))) (void)) (void)) (job-progress! 'dir-end #f #f #f) (void)) (define (extract-object! vault dirent target-path use-object-name?) (let* ((type (dirent-type dirent)) (name (dirent-name dirent)) (props (dirent-props dirent)) (target-name (if use-object-name? (make-pathname target-path name) target-path))) (cond ((eq? type 'file) (extract-file! vault props target-name)) ((eq? type 'inline) (extract-inline! vault props target-name)) ((eq? type 'tag) (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract an entire tag."))) ((eq? type 'archive-history) (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract an entire archive history."))) ((eq? type 'import) (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract an entire archive import."))) ((eq? type 'import-manifest) ;; IDEA: Although it might be useful to support this (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract an entire archive manifest."))) ((eq? type 'root) (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract an entire vault."))) ((eq? type 'snapshot) (signal (make-property-condition 'exn 'location 'extract-object! 'dirent dirent 'message "You can't extract a snapshot. Try extracting the \"contents\" inside it."))) ((eq? type 'dir) (extract-subdirectory! vault props target-name)) ((eq? type 'symlink) (extract-symlink! vault props target-name)) ((eq? type 'fifo) (extract-fifo! vault props target-name)) ((eq? type 'block-device) (extract-block-device! vault props target-name)) ((eq? type 'character-device) (extract-character-device! vault props target-name)) (else (job-log! 'error (make-pathname target-path name) (sprintf "Unable to extract an object of unknown type ~A" type)))))) (define (fold-leaf-object vault dirent kons knil) (let ((type (dirent-type dirent)) (name (dirent-name dirent)) (props (dirent-props dirent))) (cond ((eq? type 'file) (fold-file vault (cdr (assq 'contents props)) kons knil)) ((eq? type 'inline) (kons (blob->u8vector/shared (string->blob (cdr (assq 'text props)))) knil)) (else (job-log! 'error name (sprintf "Unable to extract an object of non-leaf type ~A" type)))))) (define-record-type dirent (make-dirent subnode-key name type props) dirent? (subnode-key dirent-subnode-key (setter dirent-subnode-key)) (name dirent-name (setter dirent-name)) (type dirent-type (setter dirent-type)) (props dirent-props (setter dirent-props))) (define-record-printer (dirent d out) (fprintf out "#" (dirent-name d) (dirent-subnode-key d) (dirent-type d) (dirent-props d))) ;; Make a dirent from the sexpr format used to store them in sexpr ;; streams representing directories, as generated by store-directory-entry (define (make-dirent-from-sexpr s) (let ((name (car s)) (type (cadr s)) (props (cddr s))) (make-dirent (cond ((and (eq? type 'dir) (assq 'contents props)) (cdr (assq 'contents props))) (else #f)) name type props))) )