;; pathfinder egg ;; Author: Jim Ursetto, Ursetto Consulting, Inc. ;; See LICENSE for copyright ;; Acknowledgements: Some design inspiration as well as handling of compound ;; extensions was taken from the MIT-licensed Hike library by ;; Sam Stephenson, https://github.com/sstephenson/hike (module pathfinder (make-pathfinder pathfinder-paths pathfinder-root pathfinder-test pathfinder-matcher current-pathfinder path-find path-find-all path-fold pathfinder-reset pathfinder-stat ;; export not yet justified pathfinder-default-test pathfinder-default-matcher pf:exact pf:extensions pf:compound pf:regular-file? pf:directory? pf:any? dirent-exists? dirent-contents dirent-stat dirent-directory dirent-pathname ) (import scheme chicken) (use posix srfi-69 files) (use (only posix-extras stat-regular-file? stat-directory?)) ;; for pf:xxx? tests (use srfi-1) (use data-structures) (use regex) (import irregex) (import (prefix (only data-structures sort) core:)) ;; due to conflicting keywords (import (prefix (only srfi-1 filter) core:)) ;; Notes: symlinks will trick pathfinder into caching duplicate copies, as entries are keyed to full path ;; and directories are not traversed individually to canonicalize the path. Symlinks may also ;; cause lookups to escape the root. ;; Notes: Probably not thread-safe to use same pathfinder object in multiple threads. ;; Notes: Pathnames must be normalized after make-pathname to correct the separator, as make-pathname ;; always uses /. ;; FIXME: Normalize extensions by ensuring they contain start with a dot (and contain only one dot?) ;; TODO: make-pathfinder filter and sort are not yet finalized (and thus not documented). (define-record-type pathfinder (%make-pathfinder paths entries root filter sorter matcher test) pathfinder? (paths %pathfinder-paths) ;; list: ordered search path of absolute pathnames (entries pathfinder-entry-cache pathfinder-entry-cache-set!) ;; ht: "/path/to/dir/" => dirent ht (root %pathfinder-root) ;; str: absolute root pathname (filter pathfinder-filter) ;; (proc paths): path list => filtered path list (called when dirent first read) (sorter pathfinder-sorter) ;; (proc paths): path list => sorted path list (called when dirent first read) (matcher pathfinder-matcher) ;; (test pathfinder-test) ) ;; dirent ht (define-record-type dirent (%make-dirent table list dir) dirent? (table dirent-table) (list dirent-list) (dir dirent-directory)) (define (make-dirent dir basenames) ;; primary constructor (define (alist->dirent a) (alist->hash-table a test: string=?)) (%make-dirent (alist->dirent (map (lambda (x) (cons x #t)) ;; NB: May be able to store stat result here. basenames)) basenames dir)) (define (dirent-ref de basename) ;; Internal use for getting stat vector. (hash-table-ref/default (dirent-table de) basename #f)) (define (dirent-exists? de fn) ;; User visible boolean variant of dirent-ref. (and (dirent-ref de fn) #t)) (define (dirent-set! de fn s) (hash-table-set! (dirent-table de) fn s)) (define dirent-contents dirent-list) (define (dirent-pathname de fn) (string-append (dirent-directory de) fn)) ;; dir guaranteed to end in separator ;; Stat file represented by FN in dirent DE and return stat vector. Stat data is cached at first lookup. ;; Note: if the stat fails, #f is returned and #f is stored in the cache; the entry remains in the dirent ;; but it is subsequently treated as a missing file. (define (dirent-stat de fn) (and-let* ((s (dirent-ref de fn))) (if (vector? s) s (let ((s (handle-exceptions e #f ;; Gosh darn it we need a non-erroring file-stat ;; follow symlink correct? (file-stat (dirent-pathname de fn))))) (dirent-set! de fn s) s)))) ;; entry cache ht (define (make-entry-cache) (make-hash-table string=?)) (define (entry-cache-set! ec path dirent) (hash-table-set! ec path dirent)) (define (entry-cache-ref ec path) (hash-table-ref/default ec path #f)) ;; Fresh copies for user so mutation does not violate cache invariant (define (pathfinder-paths pf) (list-copy (%pathfinder-paths pf))) (define (pathfinder-root pf) (string-copy (%pathfinder-root pf))) ;; Stat full pathname PN (possibly via stat cache). Intended to be used on full pathname AFTER ;; the pathname has been returned from a successful search, e.g. inside a path-fold. This does ;; not do a search itself; in particular, search paths which have not yet been scanned do not ;; have dirents and will return #f. Semantics and rationale for this are iffy. (define (pathfinder-stat pf pn) (define (normalize-dirname d) ;; separator is appended (normalize-pathname (make-pathname d ""))) (and-let* ((de (entry-cache-ref (pathfinder-entry-cache pf) (normalize-dirname (pathname-directory pn))))) (dirent-stat de (pathname-strip-directory pn)))) ;;; odd streams (define-syntax stream-cons (syntax-rules () ((stream-cons s1 s2) (cons s1 (delay s2))))) (define (stream-car s) (car s)) (define (stream-cdr s) (force (cdr s))) (define (stream-append s1 s2) (if (null? s1) (force s2) (stream-cons (stream-car s1) (stream-append (stream-cdr s1) s2)))) (define (stream-filter s pred?) (cond ((null? s) '()) ((pred? (car s)) (stream-cons (car s) (stream-filter (stream-cdr s) pred?))) (else (stream-filter (stream-cdr s) pred?)))) (define (list->stream L) (cond ((null? L) '()) ((null? (cdr L)) L) (else (stream-cons (car L) (list->stream (cdr L)))))) ;;; pathfinder matchers (define (pf:compound exts) (define (make-extension-predicate exts) (let ((rx (irregex `(: bos (+ (or ,@exts)) eos)))) (lambda (base term) (and (substring=? term base 0 0 (string-length term)) (string-search rx base (string-length term)))))) ;; sort 'seq' by expensive operation 'monetize' (via "schwartzian transform") (define (sort-by seq less? monetize) (map car (sort (map (lambda (x) (cons x (monetize x))) seq) (lambda (x y) (less? (cdr x) (cdr y)))))) (define sort-by-compound-extension (let ((+rx:exts+ (irregex "\\.[^.]+")) (priority (lambda (ext exts) (+ 1 (list-index (lambda (x) (string=? ext x)) exts))))) (lambda (exts term files) (sort-by files < (lambda (f) (fold (lambda (x xs) (+ (priority x exts) xs)) 0 (string-split-fields +rx:exts+ f #t (string-length term)))))))) (let ((ext? (make-extension-predicate exts))) (lambda (de term) (define (match-and-sort) (list->stream (map (lambda (fn) (cons de fn)) (sort-by-compound-extension exts term (filter (lambda (x) (ext? x term)) (dirent-contents de)))))) (if (dirent-exists? de term) (stream-cons (cons de term) (match-and-sort)) (match-and-sort))))) (define (pf:exact de fn) (if (dirent-exists? de fn) (cons (cons de fn) '()) '())) (define (pf:extensions exts) (lambda (de fn) ;; Currently assume that exts are prefixed with periods, and we just append them in order. ;; If extension *matches* what is currently on file then we do NOT append it; i.e. "a" or "a.exe" will find ;; "a.exe" if extensions list is '(".exe"). (define (match-ext e) (if (substring=? fn e (- (string-length fn) (string-length e)) 0 (string-length e)) (and (dirent-exists? de fn) fn) ;; Exact match when .ext is already on there. (let ((fe (string-append fn e))) (and (dirent-exists? de fe) fe)))) (let loop ((exts exts)) (cond ((null? exts) '()) ((match-ext (car exts)) => (lambda (fn) (stream-cons (cons de fn) (loop (cdr exts))))) (else (loop (cdr exts))))))) ;;; pathfinder tests (define (pf:regular-file? de fn) (and-let* ((s (dirent-stat de fn))) (stat-regular-file? s))) (define (pf:directory? de fn) (and-let* ((s (dirent-stat de fn))) (stat-directory? s))) (define (pf:any? de fn) #t) ;;; parameters (define pathfinder-default-test (make-parameter pf:regular-file?)) (define pathfinder-default-matcher (make-parameter pf:exact)) ;;; constructors ;; Create pathfinder object with search path PATHS (a list). Search paths may be absolute or relative. ;; - Keys: root: Any relative pathnames in PATHS are taken as relative to ROOT. ;; If ROOT itself is relative, it is taken as relative to the current working directory ;; at the time of initialization of this object. ;; filter: Procedure of 1 argument called with each BASENAME at dirent read time. Note: this should ;; really take the dirent as well, but it hasn't been constructed yet. ;; sort: Sort comparison (i.e. less?) procedure called with each BASENAME at dirent read time (define (make-pathfinder paths #!key (matcher (pathfinder-default-matcher)) (test (pathfinder-default-test)) (root ".") (filter #f) (sort #f)) (define (normalize-root r) (let ((r (normalize-pathname r))) (if (absolute-pathname? r) (string-copy r) (normalize-pathname (make-pathname (current-directory) r))))) (define (normalize-dirname d) ;; appends separator (normalize-pathname (make-pathname d ""))) (let ((root (normalize-root root))) (define (relative-to-root p) (let ((p (normalize-pathname p))) ;; e.g. if p contains a ~ (if (absolute-pathname? p) (normalize-dirname p) (normalize-dirname (make-pathname root p))))) (%make-pathfinder (map relative-to-root paths) (make-entry-cache) root (if filter (lambda (paths) (core:filter filter paths)) identity) (if sort (lambda (paths) (core:sort paths sort)) identity) matcher test))) #| ;; Unused for now. (define (compound-extension-pathfinder paths extensions #!key root filter) (make-pathfinder paths matcher: (pf:compound extensions) root: root filter: filter sort: string search-dirent) ((not (directory? p)) (entry-cache-set! ec p #t) ;; Record non-dirent for this path so we can skip this later. '()) (else (let ((de (make-dirent p (sort-paths (filter-paths (directory p 'dotfiles)))))) (entry-cache-set! ec p de) (search-dirent de))))) (define (make-normalized-dir d b) ;; append separator then normalize (normalize-pathname (let ((p (make-pathname d b))) (if (string=? b "") p (make-pathname p ""))))) (define (subdir? d/ paths) ;; #t if d/ is equal or subdir of any p/ in paths. ;; Assumes d/ and all paths are already normalized and also end in separator char. ;; (Appended separator char avoids false + if p=/foo/bar,d=/foo/bar1.) (any (lambda (p/) (and (>= (string-length d/) (string-length p/)) (substring=? d/ p/))) paths)) (parameterize ((current-pathfinder pf)) ;; May not be useful. (let ((dir (or (pathname-directory pathname) "")) (paths (%pathfinder-paths pf))) (let ((s (if (absolute-pathname? dir) (let ((p (make-normalized-dir (pathfinder-root pf) dir))) (if (subdir? p paths) (find-in-one-path p) '())) (if (null? paths) '() (let loop ((paths paths)) (let ((s (find-in-one-path (make-normalized-dir (car paths) dir)))) (if (null? (cdr paths)) s (stream-append s (delay (loop (cdr paths)))))))) ))) ;; (Filter could be moved external to this procedure.) (if (eq? filt? pf:any?) s ;; Optimize pf:any? filter (stream-filter s (lambda (x) (filt? (car x) (cdr x)))))))))) (define (path-find pf pathname #!optional matcher filt?) (let ((s (path-stream pf pathname matcher filt?))) (and (not (null? s)) (let ((e (car s))) (dirent-pathname (car e) (cdr e)))))) (define (path-find-all pf pathname #!optional matcher filt?) (reverse (path-fold pf cons '() pathname matcher filt?))) (define (path-fold pf func init pathname #!optional matcher filt?) (let loop ((s (path-stream pf pathname matcher filt?)) (xs init)) (if (null? s) xs (let ((e (stream-car s))) (loop (stream-cdr s) (func (dirent-pathname (car e) (cdr e)) xs)))))) ;; Reset cached data for pathfinder PF. (define (pathfinder-reset pf) (pathfinder-entry-cache-set! pf (make-entry-cache)) (void)) )