@startuml

title Ugarit component model\n(sample two-node distributed installation with local cache)

node "Backed Up Host" {
     frame "bin/ugarit" {
     [ugarit snapshot] --> vault
     [ugarit archive] --> vault
     [ugarit explore] --> vault
     vault - [ugarit vault] : library API

     note left of vault : High level object storage interface

     note right of [ugarit vault]
       Compression, encryption, hashing, handling
       of common data structures such as directories and
       splitting large data into blocks occurs in here.
     end note
     }

     database {
          [ugarit vault] --> [file mod-time cache] : sqlite
          [ugarit vault] --> [archive metadata cache] : sqlite
     }

     note right of [file mod-time cache]
        Storing the modification times of files
        stored in the vault means later snapshots
        can note that a file has not changed,
        and avoid having to hash the file and
        then ask the vault of data with that
        hash already exists
     end note

     note right of [archive metadata cache]
        Caching the metadata used to find
        files in the archive allows for rapid
        searching
     end note

     () "storage" as s1

     note left of s1
        Low-level storage of
        opaque encrypted blocks
     end note

     frame "bin/backend-cache" {
     [ugarit vault] --> s1
     s1 - [backend-cache] : UNIX pipe

     note right of [backend-cache]
        Caching the existence of blocks
        locally improves performance when
        the backend is on another node, as
        asking the backend if a block is
        already present is a very common
        operation.
     end note
     }

     database {
        [backend-cache] --> [local block cache] : sqlite
     }

     () "storage" as s2
     [backend-cache] --> s2

     frame "bin/backend-cluster" {
     s2 - [backend-cluster] : UNIX pipe

     note right of [backend-cluster]
        Storing blocks onto multiple
        storage devicess provides resilience
        against failure, and spreading
        blocks across a pool of storage devices
        provides an easy way to increase
        capacity by adding more.
     end note
     }

     database {
        [backend-cluster] --> [cluster state cache] : sqlite
     }

     note right of [cluster state cache]
        backend-cluster needs to remember which storage shards a block
        was written to, so we can quickly find the block again when
        it's needed.  Also, having a master index of the blocks and
        their replication status is necessary to automatically
        re-locate blocks when removing a storage shard, or when a
        shard has been permanently lost, in order to maintain the
        invariant that every block is sufficiently replicated.
     end note
}

cloud "Network of storage servers" {

node "Storage server 1" {
   () "storage" as ssh1
   frame "bin/backend-fs" as fs1 {
     ssh1 - [backend-fs splitlog 1]
   }
   database {
      [physical storage] as [db1]
      [backend-fs splitlog 1] --> [db1]
   }
}

node "Storage server 2" {
   () "storage" as ssh2
   frame "bin/backend-fs" as fs2 {
     ssh2 - [backend-fs splitlog 2]
   }
   database {
      [physical storage] as [db2]
      [backend-fs splitlog 2] --> [db2]
   }
}


[backend-cluster] -> ssh1 : UNIX pipe over ssh
[backend-cluster] -> ssh2 : UNIX pipe over ssh

}

@enduml