diff --git a/.gitignore b/.gitignore index b6e4761..ae8712a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ dist/ downloads/ eggs/ .eggs/ -lib/ +#lib/ lib64/ parts/ sdist/ diff --git a/docs/idifhub_conf.xlsx b/docs/idifhub_conf.xlsx new file mode 100644 index 0000000..f7c2748 Binary files /dev/null and b/docs/idifhub_conf.xlsx differ diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..6352d55 --- /dev/null +++ b/environment.yml @@ -0,0 +1,8 @@ +name: bastion +channels: + - defaults +dependencies: + - python=3.10 + - pyyaml + - openpyxl +prefix: /home/parselmouth/.conda/envs/bastion diff --git a/lab/strawman01.py b/lab/strawman01.py new file mode 100644 index 0000000..115f7b8 --- /dev/null +++ b/lab/strawman01.py @@ -0,0 +1,23 @@ +import Bastion.Site +import Bastion.Curator +import Bastion.Vault + + +site = Site.loadConfig() +vault = HPSS.Vault(site.vault['Purdue-Fortress']) + +for asset in site.assets: + if vault[asset].branches.latest.elapsed(today) > asset.policy.longevity: + result = vault.branch(asset) #-- create a new branch with a full backup at its base + else: + result = vault.put(asset, latest) #-- does a differential backup relative to latest. + + #---------------------------------- + #-- Do I need to do any cleanup? | + #---------------------------------- + if len(vault[asset].branches) > site.assets[].policy.LOCKS: + #-- Sort the backups from most to least recent (youngest to oldest) + q = sorted(vault[asset].branches, key = lambda b: b.created, reverse = True) + #-- Select the oldest branches for removal. + for branch in q[asset.policy.LOCKS:]: + vault.purge(branch) diff --git a/lib/Bastion/Common.py b/lib/Bastion/Common.py new file mode 100644 index 0000000..666a5b6 --- /dev/null +++ b/lib/Bastion/Common.py @@ -0,0 +1,48 @@ +""" +Bastion.Common +""" +import json +import hashlib +import base64 + +import yaml + + +def Slug40(text): + """ + I generate a 5-character slug based on the given text. + The slug is generated by hashing the text using SHAKE128, + then taking a 40-bit digest and encoding the digest using base32. + """ + h = hashlib.shake_128() + h.update(text.encode('utf-8')) + bs = h.digest(5) + return base64.b32encode(bs) + + + +class Sable: + def toJDN(self, **kwargs): + raise NotImplementedError + + def toJSON(self, **kwargs): + jdn = self.toJDN(**kwargs) + return json.dumps(jdn, indent = 3, sort_keys = True) + + def toYAML(self, **kwargs): + jdn = self.toJDN(**kwargs) + return yaml.dump(jdn, default_flow_style = False, indent = 3) + + @classmethod + def fromJDN(cls, jdn, **kwargs): + raise NotImplementedError + + @classmethod + def fromJSON(cls, js, **kwargs): + jdn = json.loads(js) + return cls.fromJDN(jdn, **kwargs) + + @classmethod + def fromYAML(cls, ydoc, **kwargs): + jdn = yaml.safe_load(ydoc) + return cls.fromJDN(jdn, **kwargs) diff --git a/lib/Bastion/Curator.py b/lib/Bastion/Curator.py new file mode 100644 index 0000000..4732e9c --- /dev/null +++ b/lib/Bastion/Curator.py @@ -0,0 +1,138 @@ +""" +Bastion.Curator + +I provide mostly data structures for working with archives and backups. +""" +from .Common import Sable, Slug40 +import hashlib +import base64 + + + +class Asset(Sable): + def __init__(name, path, about, **kwargs): + self.name = RDN + self.path = pathlib.Path(path) + self.about = about + self.RDN = None + + for kwarg in ['RDN']: + if kwarg in kwargs: + setattr(self, kwarg, kwargs[kwarg]) + + if self.RDN is None: + self.RDN = Slug40(self.name) + + def toJDN(self, **kwargs): + jdn = { + '_type': "Curator.Asset", + 'name': self.name, + 'path': str(self.path), + 'about': self.about, + 'RDN': self.RDN + } + return jdn + + +class Archive(Sable): + """ + I represent a top-level archive of some dataset. + I am analagous to a git repository in that I may contain + multiple branches of object evolution. + """ + def __init__(self, name, **kwargs): + self.name = name + self.RDN = None + + for kwarg in ['RDN']: + if kwarg in kwargs: + setattr(self, kwarg, kwargs[kwarg]) + + if self.RDN is None: + self.RDN = Slug40(self.name) + + def toJDN(self, **kwargs): + jdn = { + '_type': "Curator.Archive", + 'name': self.name, + 'RDN': self.RDN + } + return jdn + + +class Branch(Sable): + """ + I represent a branch (timeline of object evolution) relative to an archive. + """ + def __init__(self, RDN): + self.RDN = RDN + self.name = RDN + self._snaps = [ ] + + def head(self): + return self._snaps[-1] + + def base(self): + return self._snap[0] + + def created(self): + return self.base.deposited + + def updated(self): + return self.head.deposited + + def commit(self, snap): + self._snaps.append(snap) + self._snaps = sorted(self._snaps, key = lambda s: s.deposited) + + def __iter__(self): + return iter(self._snaps) + + @property + def age(self, whence = None): + whence = whence if (whence is not None) else datetime.datetime.now() + return (whence - self.created) + + + +class BlobRef: + def __init__(self, RDN, archive, branch, deposited): + self.RDN = RDN + self.name = RDN + self.archive = archive.RDN if isinstance(archive, Archive) else str(archive) + self.branch = branch.RDN if isinstance(branch, Branch) else str(branch) + self.deposited = deposited + + + +class Snap(Sable): + """ + I represent a "snapshot" in time and contain the necessary + information to restore a dataset to the state observed when this + snap was deposited. + """ + def __init__(self, RDN, archive, branch, deposited, layers, **kwargs): + self.RDN = RDN + self.name = RDN + self.archive = archive.RDN if isinstance(archive, Archive) else str(archive) + self.branch = branch.RDN if isinstance(branch, Branch) else str(branch) + self.deposited = deposited + self.layers = layers + self.about = kwargs.get('about', "") + + @property + def age(self, whence = None): + whence = whence if (whence is not None) else datetime.datetime.now() + return (whence - self.deposited) + + def toJDN(self): + jdn = { + 'RDN': self.RDN, + 'archive': self.archive, + 'branch': self.branch, + 'deposited': self.deposited.isoformat(), + 'layers': self.layers[:] + } + return jdn + + diff --git a/lib/Bastion/Site.py b/lib/Bastion/Site.py new file mode 100644 index 0000000..f82bab8 --- /dev/null +++ b/lib/Bastion/Site.py @@ -0,0 +1,88 @@ +""" +Bastion.Site +""" +import logging + +import openpyxl + +from .Common import Sable, Slug40 +from .Curator import Asset + +logger = logging.getLogger(__name__) + + +def loadSiteConfig(path = None): + if path is not None: + src = pathlib.Path(src) + else: + for p in ['~/.bastion/site.xlsx', '/etc/bastion/site.xlsx']: + p = pathlib.Path(p).expanduser() + if p.exists(): + src = p + break + + if src is None: + raise Exception("Cannot find site.xlsx configuration") + else: + logger.info("loading site configuration from {}".format(str(src))) + + conf = SiteConfig() + conf.loadXLSX(src) + + return conf + + + +class SiteConfig: + def __init__(self): + self.site = { } #-- confvar -> confval + self._assets = { } #-- @asset -> Asset + + @property + def assets(self): + return iter(self._assets.values()) + + def asset(self, k): + return self._assets[k] + + def loadXLSX(self, confpath): + wb = openpyxl.load_workbook(wb = str(confpath)) + #-- Read site conf... + self.gatherSiteEnv(wb) + #-- Read assets... + self.gatherAssets(wb) + + def gatherSiteVars(self): + ws = wb['site'] + raise NotImplementedError + + def gatherAssets(self): + pass + + +class CurationPolicy(Sable): + def __init__(self, name, path, **kwargs): + self.name = name + self.path = pathlib.Path(path) + + self.RDN = kwargs.get('RDN', Slug40(name)) + self.asset = self.RDN + self.LOCKS = kwargs.get('LOCKS', 2) #-- Lots Of Copies Keep us Safe, minimum # of branches to retain + self.longevity = kwargs.get('longevity', datetime.timedelta(days = 30)) #-- maximum time before a new branch is forced + self.about = kwargs.get('about', "") + + def toJDN(self, **kwargs): + jdn = { + '_type': "Bastion.Site.CurationPolicy", + 'RDN': self.RDN, + 'name': self.name, + 'LOCKS': self.LOCKS, + 'longevity': self.longevity.total_seconds() + 'path': str(self.path), + 'about': self.about + } + + @classmethod + def fromJDN(cls, jdn, **kwargs): + policy = cls(jdn['name'], jdn['path'], **jdn) + diff --git a/lib/Bastion/Vault.py b/lib/Bastion/Vault.py new file mode 100644 index 0000000..4a99469 --- /dev/null +++ b/lib/Bastion/Vault.py @@ -0,0 +1,33 @@ +""" +Bastion.Vault +""" +class isClerk: + """ + I am an abstract type for "clerk" objects that do data management + in the context of a vault. + """ + @property + def snaps(self): + raise NotImplementedError + + @property + def branches(self): + raise NotImplementedError + + + +class isVault: + """ + I am an abstract base type for specialized Vault classes. + """ + def __getitem__(self, asset): + raise NotImplementedError + + @property + def assets(self): + raise NotImplementedError + + def put(self, asset, latest = None): + pass + + def diff --git a/lib/Bastion/__init__.py b/lib/Bastion/__init__.py new file mode 100644 index 0000000..e69de29