(module ugarit-archive (init-archive-subsystem tag-archive-import! make-archive-entry archive-entry? archive-entry-key archive-entry-key-reused? archive-entry-alist archive-entry-import archive-entry-property archive-entry-guessed-extension archive-import? archive-import-tag archive-import-key archive-import-alist update-archive-cache-for-tag! get-import-parents get-import-children get-import-props search-archive list-archive-properties list-archive-property-values archive-get-entry) (import scheme) (import chicken) (import extras) (import ports) (use ugarit-core) (use ugarit-streams) (use ugarit-mime) (use sql-de-lite) (use ssql) (use matchable) (use srfi-1) (use srfi-13) (use srfi-69) ;; FIXME: Create stored procedures where applicable. ;; FIXME: Create indices where applicable. (define-record-type vault-subsys (make-vault-subsys) vault-subsys?) (define (init-archive-subsystem cache) (ensure-table cache "archive_imports" "CREATE TABLE archive_imports (tag TEXT, import_id INTEGER PRIMARY KEY, key TEXT);") (ensure-table cache "archive_import_props" "CREATE TABLE archive_import_props (import_id INTEGER, prop TEXT, value TEXT);") (ensure-table cache "archive_objects" "CREATE TABLE archive_objects (object_id INTEGER PRIMARY KEY, key TEXT, import_id INTEGER);") (ensure-table cache "archive_entries" "CREATE TABLE archive_entries (object_id INTEGER, import_id INTEGER, prop TEXT, value TEXT);")) (define-record-type archive-import (make-archive-import tag key alist) archive-import? (tag archive-import-tag) (key archive-import-key) (alist archive-import-alist)) (define-record-type archive-entry (make-archive-entry* key key-reused? alist import) archive-entry? (key archive-entry-key) ; Key of archived object (key-reused? archive-entry-key-reused?) ; Key reused? (alist archive-entry-alist) ; Alist of properties - allows duplicated keys! (import archive-entry-import)) ; Make an archive entry with no import (define (make-archive-entry key key-reused? alist) (make-archive-entry* key key-reused? alist #f)) ; Return a list of all values for the given property symbol ; in the given archive entry's alist (define (archive-entry-property ae prop) (let loop ((props (archive-entry-alist ae)) (results '())) (cond ((null? props) results) ((eq? (caar props) prop) (loop (cdr props) (cons (cdar props) results))) (else (loop (cdr props) results))))) ; Try and find a file extension ; 1) If there are names with an extension, use that ; 2) Otherwise, guess from mime types (define (archive-entry-guessed-extension ae) (let ((filenames (archive-entry-property ae 'filename)) (mime-types (archive-entry-property ae 'dc:format))) (cond ((not (null? filenames)) (extension-from-filename (car filenames))) ((not (null? mime-types)) (let* ((maybe-guessed-extensions (map mimetype->extension mime-types)) (guessed-extensions (filter (lambda (x) x) maybe-guessed-extensions))) (car guessed-extensions))) (else "")))) (define (store-archive-entries! vault entries) (let ((ssw (make-sexpr-stream-writer* vault 'a 'ai))) (for-each (lambda (ae) ((sexpr-stream-writer-write! ssw) (cons (archive-entry-key ae) (archive-entry-alist ae)) (list (cons (archive-entry-key ae) (archive-entry-key-reused? ae))))) entries) ; Returns (values key reused?) ((sexpr-stream-writer-finish! ssw)))) ; Creates an archive import block (archive) ; Which contains zero or more references to previous archive imports, ; import-wide properties, and the key of an archive data (ad/adi) ; sexpr-stream of serialised archive entries. ; Tag is a tag name symbol ; Updates is a list of archive entries ; Import-props is an alist (with duplicates) ; of properties for the whole import (define (tag-archive-import! vault tag updates import-props job) (check-vault-writable vault) (vault-lock-tag! vault tag) (receive (contents-key contents-reused?) ; Write sexpr stream of updates (store-archive-entries! vault updates) ; Construct archive import block (receive (job-log-key job-log-key-reused?) (if job (store-job-log! vault job) (values #f #f)) (let* ((previous (vault-tag vault tag)) (archive-block (append (if job (list (cons 'stats (job-stats-alist job)) (cons 'log job-log-key)) '()) (list (cons 'mtime (current-seconds)) (cons 'contents contents-key)) import-props (if (tag? previous) (list (cons 'previous (tag-key previous))) '())))) (when (tag? previous) (when (not (eq? (tag-type previous) 'archive)) (error (sprintf "It is illegal to add an archive import to the tag ~A, which is of type ~A" tag (tag-type previous))))) (receive (import-key import-reused?) ; Store archive import block (store-sexpr! vault archive-block 'archive ; Refer to the contents, but not to ; the previous import, as we're ; overwriting the tag that points to ; it which is an implicit unlink which ; will just cancel out the link (append (list (cons contents-key contents-reused?)) (if job-log-key (list (cons job-log-key job-log-key-reused?)) '()))) ; Update tag to point to it (vault-flush! vault) (vault-set-tag! vault (make-tag tag 'archive import-key)) (vault-unlock-tag! vault tag) ; Unlikely, but not impossible: (when import-reused? (vault-link! vault import-key)) ; Return import key for posterity import-key))))) (define (find-object db object-key import-id) (let ((r (query fetch (sql db "SELECT object_id FROM archive_objects WHERE key = ?;") object-key))) (if (pair? r) (begin (let ((object-id (car r))) ; Update last-import pointer (exec (sql db "UPDATE archive_objects SET import_id = ? WHERE object_id = ?;") import-id object-id) object-id)) (begin (exec (sql db "INSERT INTO archive_objects (key, import_id) VALUES (?,?);") object-key import-id) (last-insert-rowid db))))) (define (cache-archive-entries! vault db import-id key) (fold-sexpr-stream vault key 'a 'ai (lambda (entry-sexpr acc) (let* ((object-key (car entry-sexpr)) (object-props (cdr entry-sexpr)) (object-id (find-object db object-key import-id))) (for-each (lambda (ae) (exec (sql db "INSERT INTO archive_entries (object_id, import_id, prop, value) VALUES (?,?,?,?);") object-id import-id (symbol->string (car ae)) (serialise (cdr ae)))) object-props))) (void))) (define (cache-import*! vault tag-name key) (let ((db (vault-cache vault))) (when (zero? (car (query fetch (sql db "SELECT COUNT(*) FROM archive_imports WHERE tag = ? AND key = ?;") tag-name key))) (exec (sql db "INSERT INTO archive_imports (tag,key) VALUES (?,?);") tag-name key) (let ((import-id (last-insert-rowid db)) (import-block (read-sexpr vault key 'archive))) (for-each (lambda (ie) ; For each entry in the property alist... (exec (sql db "INSERT INTO archive_import_props (import_id, prop, value) VALUES (?,?,?);") import-id (symbol->string (car ie)) (serialise (cdr ie)))) import-block) ; Cache all parent references (recurses here) (for-each (lambda (ie) (when (eq? (car ie) 'previous) (cache-import*! vault tag-name (cdr ie)))) import-block) ; Cache archive entries from contents (let ((contents-key (assq 'contents import-block))) (when (pair? contents-key) (cache-archive-entries! vault db import-id (cdr contents-key)))))))) (define (cache-import! vault tag-name key) (let ((db (vault-cache vault))) (when (zero? (car (query fetch (sql db "SELECT COUNT(*) FROM archive_imports WHERE tag = ? AND key = ?;") tag-name key))) (vault-cache-flush! vault) ; Start a new txn (cache-import*! vault tag-name key) (vault-cache-flush! vault) ; Flush that txn ))) ; Compile a query into an ssql where clause (define (compile-query vault tag-name query) (match query (#t '(= 1 1)) (#f '(= 1 0)) (('and) '(= 1 1)) (('and . stuff) (cons 'and (map (cut compile-query vault tag-name <>) stuff))) (('or) '(= 1 0)) (('or . stuff) (cons 'or (map (cut compile-query vault tag-name <>) stuff))) (('not stuff) (list 'not (compile-query vault tag-name stuff))) (('= 'key value) (unless (string? value) (error "Vault keys must be strings" value)) `(= archive_objects.key ,value)) (('= ('$import prop) value) (unless (symbol? prop) (error "Property names must be symbols" prop)) `(in archive_objects.object_id (select archive_objects.object_id (from archive_objects archive_imports archive_import_props) (where (and (= archive_import_props.import_id archive_objects.import_id) (= archive_imports.import_id archive_import_props.import_id) (= archive_imports.tag ,tag-name) (= archive_import_props.prop ,(symbol->string prop)) (= archive_import_props.value ,(serialise value))))))) (('= ('$ prop) value) (unless (symbol? prop) (error "Property names must be symbols" prop)) `(in archive_objects.object_id (select archive_entries.object_id (from archive_entries archive_imports) (where (and (= archive_entries.import_id archive_objects.import_id) (= archive_entries.import_id archive_imports.import_id) (= archive_imports.tag ,tag-name) (= archive_entries.prop ,(symbol->string prop)) (= archive_entries.value ,(serialise value))))))) (_ (error (sprintf "Unknown filter expression ~S" query) query)) ) ) (define (update-archive-cache-for-tag! vault tag-name) (let ((tag (vault-tag vault tag-name))) (when tag (cache-import! vault tag-name (tag-key tag))))) (define (make-object-ssql tag-name filter-ssql) `(select (distinct (columns archive_objects.key archive_objects.object_id archive_objects.import_id)) (from (join inner archive_imports archive_objects (on (= archive_imports.import_id archive_objects.import_id)))) (where (and (= archive_imports.tag ,tag-name) ,filter-ssql)))) ; Returns one archive entry object, or #f (define (archive-get-entry vault tag-name key) (let* ((db (vault-cache vault)) (final-ssql `(select (columns prop value) (from (join inner (join inner archive_imports archive_objects (on (= archive_imports.import_id archive_objects.import_id))) archive_entries (on (= archive_objects.object_id archive_entries.object_id)))) (where (and (= archive_imports.tag ,tag-name) (= archive_objects.key ,key))) (order (desc prop) value))) (final-sql (ssql->sql #f final-ssql)) (results (query fetch-all (sql db final-sql)))) (if (pair? results) (let ((alist (map (lambda (row) (let ((key (first row)) (prop (second row))) (cons key prop))) results))) (make-archive-entry key #t alist)) ; else case, nothing found #f))) (define (get-archive-import vault tag-name import-key) (let* ((db (vault-cache vault)) (query-ssql `(select (columns prop value) (from archive_import_props archive_imports) (where (and (= archive_imports.tag ,tag-name) (= archive_imports.key ,import-key) (= archive_import_props.import_id archive_imports.import_id))))) (query-sql (ssql->sql #f query-ssql)) (results (query fetch-all (sql db query-sql))) (props (map (lambda (row) (let ((prop (string->symbol (first row))) (value (deserialise (second row)))) (cons prop value))) results))) (make-archive-import tag-name import-key props))) ; Returns a list of archive entry objects (define (search-archive vault tag-name filter-expr) (update-archive-cache-for-tag! vault tag-name) ; Now construct and run the query against the cache (let* ((db (vault-cache vault)) ; filter-ssql is a WHERE clause fragment (filter-ssql (compile-query vault tag-name filter-expr)) ; object-ssql wraps that in a query on the right tables, ; returning object_key, object_id, import_id (object-ssql (make-object-ssql tag-name filter-ssql)) (final-ssql `(select (columns archive_imports.key x.key prop value) (from (join left (join left (as ,object-ssql x) archive_entries (on (and (= x.object_id archive_entries.object_id) (= x.import_id archive_entries.import_id)))) archive_imports (on (= archive_imports.import_id x.import_id)))) (order x.key (desc prop) value))) (final-sql (ssql->sql #f final-ssql)) (results (query fetch-all (sql db final-sql))) (prop-hash (make-hash-table)) ; key->alist (import-key-hash (make-hash-table)) ; import key->import object (import-hash (make-hash-table))) ; key->import key ; Gather all properties for each key in a hash ; IDEA: Use the fact we order by x.key to do this without ; a hash... (for-each (lambda (row) (let ((import-key (first row)) (key (second row)) (prop (string->symbol (third row))) (value (deserialise (fourth row)))) (if (hash-table-exists? prop-hash key) (begin (hash-table-set! prop-hash key (cons (cons prop value) (hash-table-ref prop-hash key)))) (begin (hash-table-set! import-hash key import-key) (hash-table-set! import-key-hash import-key #f) (hash-table-set! prop-hash key (cons (cons prop value) '())))))) results) ; fetch imports (hash-table-for-each import-key-hash (lambda (key _) (hash-table-set! import-key-hash key (get-archive-import vault tag-name key)))) ; Convert into archive-entry records (hash-table-map prop-hash (lambda (key props) (make-archive-entry* key #t props (hash-table-ref import-key-hash (hash-table-ref import-hash key))))))) ; Given a filter expression, return all property names used by objects ; matching it, in alphabetical order (define (list-archive-properties vault tag-name filter-expr) (update-archive-cache-for-tag! vault tag-name) (let* ((db (vault-cache vault)) (filter-ssql (compile-query vault tag-name filter-expr)) (object-ssql (make-object-ssql tag-name filter-ssql)) (final-ssql `(select (distinct prop) (from (join left (as ,object-ssql x) archive_entries (on (and (= x.object_id archive_entries.object_id) (= x.import_id archive_entries.import_id))))) (order prop))) (final-sql (ssql->sql #f final-ssql))) (map (lambda (result) (string->symbol (car result))) (query fetch-all (sql db final-sql))))) ; Given a filter expression and a property name, return all values of ; that property used by objects matching the filter in popularity ; order (define (list-archive-property-values vault tag-name filter-expr prop-name) (update-archive-cache-for-tag! vault tag-name) (let* ((db (vault-cache vault)) (filter-ssql (compile-query vault tag-name filter-expr)) (object-ssql (make-object-ssql tag-name filter-ssql)) (final-ssql `(select (columns value (count *)) (from (join left (as ,object-ssql x) archive_entries (on (and (= x.object_id archive_entries.object_id) (= x.import_id archive_entries.import_id))))) (where (= prop ,(symbol->string prop-name))) (group value) (order (desc (count *))))) (final-sql (ssql->sql #f final-ssql))) (map (lambda (result) (deserialise (car result))) (query fetch-all (sql db final-sql))))) (define (get-import-parents vault tag-name key) (let* ((db (vault-cache vault)) (r (query fetch-all (sql db "SELECT value FROM archive_import_props WHERE import_id IN (SELECT import_id FROM archive_imports WHERE tag = ? AND key = ?) AND prop = 'previous';") tag-name key))) (map (lambda (x) (deserialise (car x))) r))) (define (get-import-children vault tag-name key) (let* ((db (vault-cache vault)) (r (query fetch-all (sql db "SELECT key FROM archive_imports WHERE import_id IN (SELECT import_id FROM archive_import_props WHERE prop = 'previous' AND value = ?) AND tag = ?") (serialise key) tag-name))) (map car r))) (define (get-import-props vault tag-name key) (let* ((db (vault-cache vault)) (r (query fetch-all (sql db "SELECT prop, value FROM archive_import_props WHERE import_id IN (SELECT import_id FROM archive_imports WHERE tag = ? AND key = ?);") tag-name key))) (map (lambda (x) (cons (string->symbol (first x)) (deserialise (second x)))) r))) ;; IDEA: list-import-properties, list-import-property-values, as above but for ($import ...) thingies )