diff --git a/README.md b/README.md index 9fbd8c5..8879b94 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,27 @@ # bastion Scripts that integrate RCAC Fortress, MS Teams, etc. + +## Concepts +Data assets can have concurrent presence in three different namespaces: + 1. the host file system + 2. the logical zone (kind of like a "location" when configuring a web server) + 3. the vault + +Backup operations are semantically done in the logical and vault file systems. +This allows for (example) data assets to be moved from one host to another, or possibly replicated onto several hosts. + +### Host File System +Data assets in the host file system are described as fully qualified (i.e. from the root of the file system) POSIX paths. + + +### Logical File System (Resource Zone) +Data assets also belong to a logical "resource" zone. +In an zone, assets are fully described via an Asset Resource Key (ARK) which is a triple of (site.name, zone.name, asset.name) + + +### Vault Namespace +Data assets can be fully copied or differentially updated in a vault. +These objects are described using a BLONDE (BLOb Name and Description Encoded), which is a compact, unique name for a backup object that encodes a reference to the source ARK, its lineage, and time of commit. +In the vault, a BLONDE can either be an anchor (full backup) or a differential. +References to assets are done by "badge" - which is a 40 bit chunk from a SHAKE128 hash of the asset's ARK (in CURIE format). +Time is also highly compressed using "Quantim" diff --git a/bin/bastion.py b/bin/bastion.py index 2c0d060..2d54aae 100755 --- a/bin/bastion.py +++ b/bin/bastion.py @@ -158,8 +158,11 @@ def do_help(self, comargs, comdex): #-- backup operations | #---------------------- def do_backup_asset(self, comargs, comdex): + """ + backup asset {ARK} + """ ark = ARK(comdex[2]) - print(ark) +# print(ark) site = self.site(ark.site) vault = self.vault('fortress') vault.provision(ark) @@ -205,8 +208,14 @@ def do_export_asset_manifest(self, comargs, comdex): if __name__ == '__main__': app = App().configured() - app.run( ) - + #app.run( ) + comdex = dict(enumerate(sys.argv[1:])) + fortress = app.vault('fortress') + rusina = app.site('rusina') + ark = ARK(comdex[2]) + ds = rusina.asset(ark) + fortress.provision(ark) + #bastion site {site} backup #bastion zone {zone} backup diff --git a/lib/Bastion/Actions.py b/lib/Bastion/Actions.py index de48fe5..34abce6 100644 --- a/lib/Bastion/Actions.py +++ b/lib/Bastion/Actions.py @@ -3,3 +3,5 @@ I am the functions that execute the various actions of the bastion app. """ +def doBackupAsset(site, vault, ark): + raise NotImplementedError diff --git a/lib/Bastion/Common.py b/lib/Bastion/Common.py index e7d2bb5..76f95da 100644 --- a/lib/Bastion/Common.py +++ b/lib/Bastion/Common.py @@ -35,7 +35,7 @@ def RDN(x): """ Answers the relatively distinguishing name (RDN) of object, x. If x is a string, it is assumed that x is the name. - If x is an object with a CN attribute, answers x.RDN + If x is an object with an RDN attribute, answers x.RDN """ if isinstance(x, str): return x @@ -135,6 +135,9 @@ def __new__(cls, *args, **kwargs): @property def path(self): + """ + A Pure POSIX path object for the path relative to my zone. + """ return pathlib.PurePosixPath(self[1]) def __str__(self): @@ -148,6 +151,11 @@ def __repr__(self): @property def ref(self): + """ + The qualified name of this object within my space. + If my name isn't qualified (i.e. has no extra "arguments"), then this is the string representation of my path; + however, if my name is qualified, then I include the arguments after the typical URL "?" separator. + """ if self.args: return "{}?{}".format(str(self.path), self.args) else: diff --git a/lib/Bastion/Curator.py b/lib/Bastion/Curator.py index e373618..22eb5d5 100644 --- a/lib/Bastion/Curator.py +++ b/lib/Bastion/Curator.py @@ -9,11 +9,11 @@ from .Model import ARK from .Chronology import Quantim -class BLOND(canTextify): + +class BLONDE(canTextify): """ - BLOb Name and Description - I am a structured name describing a single ark. - An ark is typically a BLOB or file, hence I am usually BLOB name (e.g. in an S3 bucket) or a file name. + BLOb Name and Description Encoding + I am a structured name describing a point in time for a single ARK. """ def __init__(self, asset, detail, basis = None, when = None): if isinstance(asset, ARK): @@ -57,25 +57,25 @@ def __str__(self): return self.encode(self.badge, self.when, self.detail, self.basis) @classmethod - def decode(cls, blond): + def decode(cls, blonde): ds = Thing(**{ - 'badge': blond[0:8], - 'when': Quantim(blond[8:16]), - 'detail': blond[16], - 'basis': blond[17:20], + 'badge': blonde[0:8], + 'when': Quantim(blonde[8:16]), + 'detail': blonde[16], + 'basis': blonde[17:20], }) return cls(ds.badge, ds.when, ds.detail, ds.basis) @classmethod - def fullBackup(cls, ark, **kwargs): + def forFullBackup(cls, ark, **kwargs): when = kwargs.get('when', datetime.datetime.utcnow()) basis = kwargs.get('basis', Boggle(3)) return cls(ark, 'F', basis, when) @classmethod - def diffBackup(cls, anchor, **kwargs): + def forDiffBackup(cls, anchor, **kwargs): """ - Given a full backup reference (BLOND), I generate a new BLOND for a differential backup. + Given a full backup reference (BLONDE), I generate a new BLONDE for a differential backup. """ when = kwargs.get('when', datetime.datetime.utcnow()) return cls(anchor.badge, 'D', anchor.basis, when) @@ -136,12 +136,12 @@ class Manifest: def __init__(self, asset, items): self.asset = ARK(asset) self.badge = self.asset.badge - self._items = None #-- a sorted tuple of BLONDs + self._items = None #-- a sorted tuple of BLONDEs self._snaps = None self._anchors = None #-- a map of anchor layer -> blob name - blonds = [BLOND(item) for item in items] - self._items = tuple(sorted([blond for blond in blonds if (blond.badge == self.badge)], key = lambda b: b.RDN)) + blondes = [BLONDE(item) for item in items] + self._items = tuple(sorted([blonde for blonde in blondes if (blonde.badge == self.badge)], key = lambda b: b.RDN)) self._anchors = dict([(item.layer, item) for item in self._items if item.isAnchor]) def __iter__(self): @@ -179,21 +179,21 @@ def thread(self, ankle): anchor = ankle elif isinstance(ankle, snap): anchor = ankle.anchor.basis - elif isinstance(ankle, BLOND): + elif isinstance(ankle, BLONDE): anchor = ankle.basis return Thread([snap for snap in self.snaps if snap.basis == anchor]) def anchor(self, item): """ - I answer the blond to the anchor for the given item. - .anchor(item:BLOND) + I answer the BLONDE to the anchor for the given item. + .anchor(item:BLONDE) .anchor(item:str) .anchor(item:Snap) """ - if isinstance(item, BLOND): + if isinstance(item, BLONDE): return self._anchors[item.anchor] if isinstance(item, str): - return self.anchor( BLOND.decode(item) ) + return self.anchor( BLONDE.decode(item) ) if isinstance(item, Snap): return item.anchor raise ValueError @@ -201,7 +201,7 @@ def anchor(self, item): @property def anchors(self): """ - I answer a chronologically (earliest -> latest) tuple of BLONDs for my anchor items. + I answer a chronologically (earliest -> latest) tuple of BLONDEs for my anchor items. """ return tuple(sorted(self._anchors.values, key = lambda item: item.RDN)) @@ -209,7 +209,7 @@ def anchors(self): class Snap: """ - A snap is a pair of blonds (differential, anchor) + A snap is a pair of BLONDEs (differential, anchor) In the case that the snap is an anchor then (anchor, anchor) """ def __init__(self, head, anchor): diff --git a/lib/Bastion/HPSS.py b/lib/Bastion/HPSS.py index 9aadd4a..906a5e9 100644 --- a/lib/Bastion/HPSS.py +++ b/lib/Bastion/HPSS.py @@ -8,7 +8,7 @@ from Bastion.Common import Thing, Unknown import Bastion.Model -from Bastion.Curator import BLOND +from Bastion.Curator import BLONDE @@ -382,34 +382,47 @@ def manifest(self, *args): manifest(site, zone, asset) """ if len(args) == 1: - ark = args[0] + return self._manifest_ark( args[0] ) elif len(args) == 3: - site, zone, asset = args - ark = ARK(site, zone, asset) + return self._manifest_site_zone_asset( args[0], args[1], args[2] ) else: raise ValueError + + def _manifest_ark(self, ark): #-- The contents of {root}/{site}/{zone}/{asset} are backup blobs #-- Each name in this folder is a "BLOND" (BLOb Name and Descriptor) #-- The manifest a catalog of all of the backup objects for the asset. fls = self.hsi.ls(self.root / ark.site / ark.zone / ark.asset) - blonds = [fl.path.name for fl in fls] - manifest = Bastion.Curator.Manifest(ark, blonds) + blondes = [fl.path.name for fl in fls] + manifest = Bastion.Curator.Manifest(ark, blondes) return manifest + def _manifest_site_zone_asset(self, site, zone, asset): + return self._manifest_ark( ARK(site, zone, asset) ) + + def provision(self, *args): + """ + provision(ark) - ensures that the site, zone, and asset folders exist. + provision(site, zone, asset_name) - an alias for provision(ark) + provision(asset) - given an instance of Asset, provision the necessary site, zone, and asset folders. + """ if len(args) == 1: - ark = args[0] - self.hsi.mkdirs(self.root / ark.site / ark.zone / ark.asset) + return self._provision_ark( args[0] ) elif len(args) == 3: - site, zone, asset = args - ark = ARK(site, zone, asset) - self.provision(ark) + return self._provision_site_zone_asset( args[0], args[1], args[2] ) else: raise ValueError - def htar(self, asset, **kwargs): + def _provision_ark(self, ark): + self.hsi.mkdirs(self.root / ark.site / ark.zone / ark.asset) + + def _provision_site_zone_asset(self, site, zone, asset_name): + return self._provision_ark( ARK(site, zone, asset_name) ) + + def push(self, asset, **kwargs): detail = kwargs.get('detail', 'F') localf = asset.path ark = asset.ARK @@ -419,11 +432,11 @@ def htar(self, asset, **kwargs): 'site': ark.site, 'zone': ark.zone, 'asset': ark.asset, - 'blond': BLOND(ark, detail), + 'blonde': BLONDE(ark, detail), 'localf': localf } - comargs = [opts['htar'], "-Hverify=1", "-cfvf", "{site}/{zone}/{asset}/{blond}".format(**opts), "{localf}".format(**opts)] + comargs = [str(opts['htar']), "-Hverify=1", "-c", "-v", "-f", "{site}/{zone}/{asset}/{blonde}.tar".format(**opts), "{localf}".format(**opts)] proc = subprocess.run(comargs, capture_output = True, check = True) stdout = proc.stdout.decode('utf-8') stderr = proc.stderr.decode('utf-8') diff --git a/lib/Bastion/Model.py b/lib/Bastion/Model.py index c8ac9b0..a35dcc2 100644 --- a/lib/Bastion/Model.py +++ b/lib/Bastion/Model.py @@ -6,7 +6,6 @@ from .Common import RDN, CURIE, Slug40 - class ARK(tuple): """ Asset Resource Key (not to be confused with the librarian's definition of ARK!) @@ -57,6 +56,9 @@ def asset(self): @property def zolo(self): + """ + zolo ("zone location") is the path to this including its zone. + """ return pathlib.PurePosixPath(self.zone) / self.asset @property @@ -100,8 +102,105 @@ def assets(self, zone): """ raise NotImplementedError - def manifest(self, asset): + def manifest(self, ark): + """ + Given an ARK, I answer the manifest of held objects (aka BLONDEs). + """ + raise NotImplementedError + + def push(self, asset, **kwargs): """ - Given an asset, I answer the manifest of held objects. + Given an asset, I push a backup of the asset to this vault. + push(asset, detail = 'FULL') + push(asset, detail = 'DIFF') """ raise NotImplementedError + + def pull(self, blonde, **kwargs): + raise NotImplementedError + + def upload(self, path, ark): + raise NotImplementedError + + def download(self, ark, time, lpath): + raise NotImplementedError + + def configured(self, conf): + raise NotImplementedError + + + + +class isAsset: + """ + abstract Asset type, describes a local file in both the host file system and the logical (zone) space. + """ + def __init__(self, zone, name): + self.zone = zone + self.name = str(name) + self.about = None + self._RDN = None + + @property + def RDN(self): + """ + I am the relatively distinguishing name for this object. + In the case of assets, the RDN defaults to the "badge" of the asset. + In some cases, the operator may want to manually label an asset with an RDN. + Thus, an RDN can be given explicity as kwarg in object construction. + If no RDN is explicitly given, the asset's badge is used. + """ + if self._RDN is None: + self._RDN = self.badge + return self._RDN + + @property + def path(self): + """ + I answer the local (host) file system path to this asset. + """ + return self.zone.root / pathlib.Path(self.name) + + @property + def CURIE(self): + """ + My CURIE (Compact URI) form is [{site}:{zone}/{asset}] + """ + return self.ARK.CURIE + + @property + def ARK(self): + return ARK(self.zone.site.name, RDN(self.zone), self.name) + + @property + def badge(self): + """ + I am a compact (40-bit) hash constructed from my CURIE (compact URI) + """ + return self.ARK.badge + + @property + def zolo(self): + return self.ARK.zolo + + +class isZone: + """ + abstract resource zone. + """ + ASSET_CLS = isAsset + def __init__(self, site, name, root): + self.site = site + self.name = name + self.root = root if (root is None) else pathlib.Path(root) + + @property + def RDN(self): + """ + I am the relatively distinguishing name for this object. + """ + return self.name + + def __div__(self, name): + return self.ASSET_CLS(self, name) + diff --git a/lib/Bastion/Site.py b/lib/Bastion/Site.py index 913b9d6..f5af769 100644 --- a/lib/Bastion/Site.py +++ b/lib/Bastion/Site.py @@ -7,7 +7,7 @@ from .Common import * from .Condo import CxNode -from .Model import ARK +from .Model import ARK, isAsset, isZone #from .Curator import Asset logger = logging.getLogger(__name__) @@ -147,12 +147,14 @@ def resources(self, zone): def asset(self, ark): """ - Will search through all zones to locate the asset identified by the given search argument: q. - .asset(q:ARK) + Will search through all zones to locate the asset identified by the given ARK. + .asset(ark:str) #-- will interpret the given string as a CURIE like [@site:zone/path] + .asset(ark:ARK) """ - if (ark.site == self.name): - zone = self.zone(ark.zone) - return zone.assets.named(str(ark.asset)) + q = ARK(ark) #-- assure that the query is an instance of ARK + if (q.site == self.name): + zone = self.zone(q.zone) + return zone.assets.named(str(q.asset)) return None # zoned = None @@ -221,54 +223,17 @@ def fromJDN(cls, jdn): -class Zone(canConfigure): - """ - I am a (resource) zone. - A zone is a logical entry point to a collection of assets. - A zone allows for asset collections to be mounted at different paths on different sites, while retaining the same internal hierarchical structure. - For a given site, no two zones should have the same name (i.e. the name is relatively distinguishing with respect to the site) - """ - def __init__(self, site, name, root = None): - self.site = Site(site) - self.name = name - self.root = root if (root is None) else pathlib.Path(root) - self.policy = self.site.policy #--inherit my site's default policy - - @property - def RDN(self): - """ - I am the relatively distinguishing name for this object. - """ - return self.name - - @property - def assets(self): - return self.site.assets(self.name) - - def configured(self, conf): - if conf: - if entity(conf).isString: - #-- short form - self.root = pathlib.Path(conf) - else: - #-- long form - self.root = pathlib.Path(conf['root']) - if 'policy' in conf: - self.policy = RetentionPolicy(conf['policy']) - return self - - def __div__(self, name): - return Asset(self, name) - - - -class Asset(canTextify): +class Asset(isAsset, canTextify): def __init__(self, zone, *args, **kwargs): """ - Asset(zone, name) + Asset(zone:Zone, name:str) + Asset(zone:Zone, path:pathlib.Path) Asset(zone, conf:CxNode) Asset(zone, conf:dict) """ + # WORK BELOW WORK BELOW WORK BELOW + #isAsset.__init__(self, zone, name) + self.zone = zone self.name = None self.about = None @@ -277,6 +242,8 @@ def __init__(self, zone, *args, **kwargs): spec = args[0] if isinstance(spec, str): self.name = spec + elif isinstance(spec, pathlib.Path): + self.name = str(spec) elif isinstance(spec, CxNode): self.configured(spec) elif isinstance(spec, dict): @@ -299,18 +266,6 @@ def __init__(self, zone, *args, **kwargs): def __str__(self): return "{} ({})".format(self.badge, self.RDN) - @property - def RDN(self): - """ - I am the relatively distinguishing name for this object. - In the case of assets, the RDN defaults to the "badge" of the asset. - In some cases, the operator may want to manually label an asset with an RDN. - Thus, an RDN can be given explicity as kwarg in object construction. - If no RDN is explicitly given, the asset's badge is used. - """ - if self._RDN is None: - self._RDN = self.badge - return self._RDN def _get_policy(self): return getattr(self, '_policy', self.zone.policy) @@ -323,38 +278,9 @@ def _set_policy(self, p): policy = property(_get_policy, _set_policy) - @property - def path(self): - """ - I answer the local file system path to this asset. - """ - return self.zone.root / pathlib.Path(self.name) - - @property - def CURIE(self): - """ - My CURIE (Compact URI) form is [{site}:{zone}/{asset}] - """ - return self.ARK.CURIE - - @property - def ARK(self): - return ARK(self.zone.site.name, self.zone.name, self.name) - - @property - def badge(self): - """ - I am a compact (40-bit) hash constructed from my CURIE (compact URI) - """ - return self.ARK.badge - - @property - def zolo(self): - return self.ARK.zolo - def toJDN(self, **kwargs): jdn = { - 'zone': "@{}:{}".format(self.zone.site.name, self.zone.name), + 'zone': "@{}:{}".format(self.zone.site.name, RDN(self.zone)), 'path': str(self.name), } if self.about: @@ -456,3 +382,39 @@ def __len__(self): def __iter__(self): return iter(sorted(self.xRDNs.values(), key = lambda x: str(x.path))) + + +class Zone(isZone, canConfigure): + """ + I am a (resource) zone. + A zone is a logical entry point to a collection of assets. + A zone allows for asset collections to be mounted at different paths on different sites, while retaining the same internal hierarchical structure. + For a given site, no two zones should have the same name (i.e. the name is relatively distinguishing with respect to the site) + """ + + ASSET_CLS = Asset + + def __init__(self, site, name, root = None): + isZone.__init__(self, site, name, root) + self.site = Site(site) + self.policy = self.site.policy #--inherit my site's default policy + + @property + def assets(self): + return self.site.assets(self.name) + + def configured(self, conf): + if conf: + if entity(conf).isString: + #-- short form + self.root = pathlib.Path(conf) + else: + #-- long form + self.root = pathlib.Path(conf['root']) + if 'policy' in conf: + self.policy = RetentionPolicy(conf['policy']) + return self + + def __div__(self, name): + return Asset(self, name) +