From b268c51b3d7e8575dce61f29c8ea2e6134e4bc09 Mon Sep 17 00:00:00 2001 From: Nathan Denny Date: Tue, 30 Apr 2024 13:49:47 -0400 Subject: [PATCH] roll up --- bin/bastion.py | 233 +++++++++++++++++++++++++++++++----- bin/test.py | 108 +++++++++++++++++ etc/conf-idifhub.yaml | 1 + lib/Bastion/Actions.py | 5 + lib/Bastion/Chronology.py | 82 +++++++++++++ lib/Bastion/Common.py | 140 +++++++++++----------- lib/Bastion/Curator.py | 241 ++++++++++++++++++++++++++++++++------ lib/Bastion/HPSS.py | 141 ++++++++++++++++++++-- lib/Bastion/Model.py | 107 +++++++++++++++++ lib/Bastion/Site.py | 142 ++++++++++++++-------- 10 files changed, 999 insertions(+), 201 deletions(-) create mode 100755 bin/test.py create mode 100644 lib/Bastion/Actions.py create mode 100644 lib/Bastion/Chronology.py create mode 100644 lib/Bastion/Model.py diff --git a/bin/bastion.py b/bin/bastion.py index 7de70e4..2c0d060 100755 --- a/bin/bastion.py +++ b/bin/bastion.py @@ -4,6 +4,9 @@ import os import pathlib import logging +import json + +import yaml logger = logging.getLogger() logging.basicConfig(level = logging.DEBUG) @@ -17,41 +20,215 @@ sys.path.insert(0, str(LIB_PATH)) -from Bastion.Common import * -from Bastion.Site import Site -from Bastion.Condo import * - +from Bastion.Common import * +from Bastion.Site import Site +from Bastion.Condo import * +from Bastion.Actions import * +from Bastion.Model import ARK +import Bastion.HPSS + +""" +zone backup procedure... +1. read conf file(s) +2. connect to the storage vault +3. retrieve manifest for given zone +4. get most recent anchor snap +5. compute drift between current datetime and most recent anchor snap +6. perform backup + a. differential if drift < policy + b. anchor (full) if drift >= policy +""" + +def SUCCESS(obj): + return { + 'reply': { + 'status': '200', + 'message': 'Ok' + }, + 'body': obj + } + + +def FAILED(obj): + return { + 'reply': { + 'status': '400', + 'message': 'Bad Request' + }, + 'body': obj + } + + +def CRASHED(obj): + return { + 'reply': { + 'status': '500', + 'message': 'Internal Application Error (crash)' + }, + 'body': obj + } + + + + +class App: + CONF_SEARCH_ORDER = [ + pathlib.Path('/etc/bastion'), + APP_PATH / 'etc', + pathlib.Path('~/.bastion').expanduser() + ] + + def info(self, msg): + logger.info(msg) + + def debug(self, msg): + logger.debug(msg) + + def warn(self, msg): + logger.warn(msg) + + def error(self, msg): + logger.error(msg) + + def critical(self, msg): + logger.critical(msg) + + def __init__(self): + self.conf = Condex() + + def configured(self): + for folder in App.CONF_SEARCH_ORDER: + folder = folder.expanduser() + for confile in folder.rglob("conf-*.yaml"): + self.info("reading conf from {}".format(str(folder / confile))) + self.conf.load(folder / confile) + return self + + def site(self, name): + return Site(name).configured(self.conf) + + def vault(self, name): + if ((name[0] == '{') and (name[-1] == '}')): + name = name[1:-1] + if name in self.conf['vaults']: + protocol = self.conf['vaults'][name]['protocol'] + if protocol == 'HPSS': + return Bastion.HPSS.Vault(name).configured(self.conf) + else: + raise NotImplementedError + else: + return None + + def run(self): + comargs = sys.argv[1:] + comdex = dict(enumerate(sys.argv[1:])) + #verb = comargs.get(0, 'help') + + menu = [ + ("help", self.do_help), + ("export zone assets", self.do_export_zone_assets), + ("export site list", self.do_export_site_list), + ("export asset manifest", self.do_export_asset_manifest), + ("backup asset", self.do_backup_asset) + ] + + action = self.do_help #-- default is to show the help. + for request, method in menu: + tokens = request.split() + if tokens == comargs[:len(tokens)]: + action = method + try: + answer = action(comargs, comdex) + except Exception as err: + answer = CRASHED(str(err)) + + sys.stdout.write( yaml.dump(answer, default_flow_style = False) ) + if answer['reply']['status'][0] in ('1','2','3'): + sys.exit(0) + else: + sys.exit(1) + + #---------------------- + #-- basic operations | + #---------------------- + def do_help(self, comargs, comdex): + raise NotImplementedError + + #---------------------- + #-- backup operations | + #---------------------- + def do_backup_asset(self, comargs, comdex): + ark = ARK(comdex[2]) + print(ark) + site = self.site(ark.site) + vault = self.vault('fortress') + vault.provision(ark) + asset = site.asset(ark) + flag, stdout, stderr = vault.htar(asset) + if flag: + return SUCCESS({'stdout': stdout, 'stderr': stderr}) + else: + return FAILED({'stdout': stdout, 'stderr': stderr}) + + #---------------------- + #-- export operations | + #---------------------- + def do_export_site_list(self, comargs, comdex): + name = comdex[3] + vault = self.vault(name) + sitels = list(vault.sites) + report = { + 'vault': name, + 'sites': list(vault.sites) + } + return SUCCESS(report) + + def do_export_zone_assets(self, comargs, comdex): + spec = comdex[3] + vname = comdex[4] + ark = ARK("[{}]".format(spec)) + vault = self.vault(vname) + zone = ark.zone + assets = vault.assets(ark.site, ark.zone) + report = { + 'vault': vname, + 'site': ark.site, + 'zone': ark.zone, + 'assets': list(assets) + } + return SUCCESS(report) + + def do_export_asset_manifest(self, comargs, comdex): + raise NotImplementedError -CONF_SEARCH_ORDER = [ - pathlib.Path('/etc/bastion'), - APP_PATH / 'etc', - pathlib.Path('~/.bastion').expanduser() -] if __name__ == '__main__': - comargs = dict(enumerate(sys.argv[1:])) - subject = comargs.get(0, 'help') - request = comargs.get(1, 'help') - - conf = Condex() - for folder in CONF_SEARCH_ORDER: - for confile in folder.rglob("conf-*.yaml"): - print(confile) - conf.load(folder / confile) - - if subject == 'keytab': - if request == 'refresh': - raise NotImplementedError - else: - Keytab(conf).help() - - if subject == 'backups': - sname = comargs[2] - site = Site(sname).configured(conf) + app = App().configured() + app.run( ) +#bastion site {site} backup +#bastion zone {zone} backup +#bastion manifest {zone} #bastion backups tidy idifhub #bastion backups update idifhub #bastion backups catalog idifhub #bastion keytab refresh fortress +#bastion zone { } restore +#bastion asset { } restore +#bastion asset { } backup +#bastion zone { } backup + +#bastion restore asset { } +#bastion restore zone { } +#bastion backup asset { } +#bastion backup zone { } +#bastion export zone assets { } +#bastion export asset manifest { } +#bastion export site list + +#bastion export asset manifest @idifhub:LiDAR/QL2023_IN_18 {fortress} +#bastion export site list {fortress} +#bastion export zone assets @idifhub:LiDAR {fortress} +#bastion backup asset [@idifhub:LiDAR/QL2023_IN_18] diff --git a/bin/test.py b/bin/test.py new file mode 100755 index 0000000..a2d8616 --- /dev/null +++ b/bin/test.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +import sys +import os +import pathlib +import logging + +logger = logging.getLogger() +logging.basicConfig(level = logging.DEBUG) + + +BIN_PATH = pathlib.Path(sys.argv[0]).absolute().parent +APP_PATH = BIN_PATH.parent +LIB_PATH = APP_PATH / 'lib' +LBX_PATH = APP_PATH / 'lib-exec' + +sys.path.insert(0, str(LIB_PATH)) + +from Bastion.Common import * +from Bastion.Site import Site +from Bastion.Condo import Condex +import Bastion.HPSS +from Bastion.Curator import BLOND + +""" +zone backup procedure... +1. read conf file(s) +2. connect to the storage vault +3. retrieve manifest for given zone +4. get most recent anchor snap +5. compute drift between current datetime and most recent anchor snap +6. perform backup + a. differential if drift < policy + b. anchor (full) if drift >= policy +""" + + +class App: + CONF_SEARCH_ORDER = [ + pathlib.Path('/etc/bastion'), + APP_PATH / 'etc', + pathlib.Path('~/.bastion').expanduser() + ] + + def info(self, msg): + logger.info(msg) + + def debug(self, msg): + logger.debug(msg) + + def warn(self, msg): + logger.warn(msg) + + def error(self, msg): + logger.error(msg) + + def critical(self, msg): + logger.critical(msg) + + def __init__(self): + self.conf = Condex() + + def configured(self): + for folder in App.CONF_SEARCH_ORDER: + folder = folder.expanduser() + for confile in folder.rglob("conf-*.yaml"): + self.info("reading conf from {}".format(str(folder / confile))) + self.conf.load(folder / confile) + return self + + + + + + + +if __name__ == '__main__': + app = App().configured() + #app.run( ) + + vault = Bastion.HPSS.Vault('fortress').configured(app.conf) + site = Site('idifhub').configured(app.conf) + + RzLiDAR = site.zones[0] + assets = site.assets(RzLiDAR) + asset = assets[0] + + + +#bastion site {site} backup +#bastion zone {zone} backup +#bastion manifest {zone} +#bastion backups tidy idifhub +#bastion backups update idifhub +#bastion backups catalog idifhub +#bastion keytab refresh fortress +#bastion zone { } restore +#bastion asset { } restore +#bastion asset { } backup +#bastion zone { } backup + +#bastion restore asset { } +#bastion restore zone { } +#bastion backup asset { } +#bastion backup zone { } +#bastion export zone manifest { } +#bastion export asset manifest { } +#bastion export site list diff --git a/etc/conf-idifhub.yaml b/etc/conf-idifhub.yaml index c98873d..568ba83 100644 --- a/etc/conf-idifhub.yaml +++ b/etc/conf-idifhub.yaml @@ -54,6 +54,7 @@ vaults: protocol: HPSS host: fortress.rcac.purdue.edu login: ndenny + root: /home/ndenny key: path: /home/ndenny/.private/hpss.unix.keytab refresh: diff --git a/lib/Bastion/Actions.py b/lib/Bastion/Actions.py new file mode 100644 index 0000000..de48fe5 --- /dev/null +++ b/lib/Bastion/Actions.py @@ -0,0 +1,5 @@ +""" +Bastion.Actions + +I am the functions that execute the various actions of the bastion app. +""" diff --git a/lib/Bastion/Chronology.py b/lib/Bastion/Chronology.py new file mode 100644 index 0000000..bac9cd2 --- /dev/null +++ b/lib/Bastion/Chronology.py @@ -0,0 +1,82 @@ +""" +Bastion.Chronology +""" +import string +import datetime +import logging + +logger = logging.getLogger(__name__) + + +class Quantim: + """ + Quantized Time (Quantim). + I represent with ~ 1 minute of precision a date within the 3rd millenium. + (i.e. 2000 - 2999) + The quantized minute is described in base36 (0...9,A...Z). + With two digits, the day is divided into 1,296 quantums, each of which is ~ 66.67 seconds. + """ + + EN36 = list(string.digits + string.ascii_uppercase) + DE36 = dict([(c, i) for i, c in enumerate(EN36)]) + QUANTUM = 86400.0 / (36**2) + + def __init__(self, whence, separator = None): + self.separator = separator if (separator is not None) else '' + + if isinstance(whence, datetime.datetime): + year_starts = datetime.datetime(whence.year, 1, 1, 0, 0, 0) + adnl_seconds = (whence - year_starts).seconds + + self.dY = whence.year - 2000 + self.dD = (whence - year_starts).days + self.qM = int(adnl_seconds // Quantim.QUANTUM) + + elif isinstance(whence, Quantim): + self.dY = whence.dY + self.dD = whence.dD + self.qM = whence.qM + + elif isinstance(whence, str): + if self.separator: + words = whence.split(self.separator) + if len(words) == 3: + yW = words[0] + dW = words[1] + qW = words[2] + else: + raise Exception("Quantim:__init__ parse error for '{}'".format(whence)) + else: + if len(whence) == 8: + yW = whence[0:3] + dW = whence[3:6] + qW = whence[6:8] + else: + raise Exception("Quantim:__init__ parse error for '{}'".format(whence)) + + self.dY = int(yW) + self.dD = int(dW) + self.qM = (Quantim.DE36[qW[0]] * 36) + Quantim.DE36[qW[1]] + + else: + logger.debug("{} is type {}".format(str(whence), type(whence))) + raise ValueError("Quantim:__init__ cannot create instance from whence '{}'".format(whence)) + + def __str__(self): + lsq = self.qM % 36 + msq = self.qM // 36 + xmap = list(string.digits + string.ascii_uppercase) + yW = "{:03d}".format(self.dY) + dW = "{:03d}".format(self.dD) + qW = xmap[msq] + xmap[lsq] + return self.separator.join([yW, dW, qW]) + + def datetime(self): + y = datetime.datetime(self.dY + 2000, 1, 1, 0, 0, 0) + elapsed_days = self.dD * DAYS + elapsed_seconds = ((self.qM * Quantim.QUANTUM) + (Quantim.QUANTUM / 2)) * SECONDS + return (y + elapsed_days + elapsed_seconds) + + @classmethod + def now(cls): + return cls(datetime.datetime.now()) diff --git a/lib/Bastion/Common.py b/lib/Bastion/Common.py index a96e689..e7d2bb5 100644 --- a/lib/Bastion/Common.py +++ b/lib/Bastion/Common.py @@ -8,6 +8,8 @@ import datetime import string import operator +import random +import pathlib import yaml @@ -128,14 +130,18 @@ def __new__(cls, *args, **kwargs): raise TypeError itype = property(operator.itemgetter(0)) - path = property(operator.itemgetter(1)) + space = property(operator.itemgetter(0)) args = property(operator.itemgetter(2)) + @property + def path(self): + return pathlib.PurePosixPath(self[1]) + def __str__(self): if self.args: - return "[{}:{}?{}]".format(self.itype, self.path, self.args) + return "[{}:{}?{}]".format(self.itype, str(self.path), self.args) else: - return "[{}:{}]".format(self.itype, self.path) + return "[{}:{}]".format(self.itype, str(self.path)) def __repr__(self): return str(self) @@ -143,9 +149,9 @@ def __repr__(self): @property def ref(self): if self.args: - return "{}?{}".format(self.path, self.args) + return "{}?{}".format(str(self.path), self.args) else: - return self.path + return str(self.path) @staticmethod def parse(c): @@ -178,73 +184,6 @@ def __matmul__(self, ns): raise TypeError -class Quantim: - """ - Quantized Time (Quantim). - I represent with ~ 1 minute of precision a date within the 3rd millenium. - (i.e. 2000 - 2999) - The quantized minute is described in base36 (0...9,A...Z). - With two digits, the day is divided into 1,296 quantums, each of which is ~ 66.67 seconds. - """ - - EN36 = list(string.digits + string.ascii_uppercase) - DE36 = dict([(c, i) for i, c in enumerate(EN36)]) - QUANTUM = 86400.0 / (36**2) - - def __init__(self, whence, separator = None): - self.separator = separator if (separator is not None) else '' - - if isinstance(whence, datetime.datetime): - year_starts = datetime.datetime(whence.year, 1, 1, 0, 0, 0) - adnl_seconds = (whence - year_starts).seconds - - self.dY = whence.year - 2000 - self.dD = (whence - year_starts).days - self.qM = int(adnl_seconds // Quantim.QUANTUM) - - elif isinstance(whence, str): - if self.separator: - words = whence.split(self.separator) - if len(words) == 3: - yW = words[0] - dW = words[1] - qW = words[2] - else: - raise Exception("Quantim:__init__ parse error for '{}'".format(whence)) - else: - if len(whence) == 8: - yW = whence[0:3] - dW = whence[3:6] - qW = whence[6:8] - else: - raise Exception("Quantim:__init__ parse error for '{}'".format(whence)) - - self.dY = int(yW) - self.dD = int(dW) - self.qM = (Quantim.DE36[qW[0]] * 36) + Quantim.DE36[qW[1]] - - else: - raise ValueError("Quantim:__init__ cannot create instance from whence '{}'".format(whence)) - - def __str__(self): - lsq = self.qM % 36 - msq = self.qM // 36 - xmap = list(string.digits + string.ascii_uppercase) - yW = "{:03d}".format(self.dY) - dW = "{:03d}".format(self.dD) - qW = xmap[msq] + xmap[lsq] - return self.separator.join([yW, dW, qW]) - - def datetime(self): - y = datetime.datetime(self.dY + 2000, 1, 1, 0, 0, 0) - elapsed_days = self.dD * DAYS - elapsed_seconds = ((self.qM * Quantim.QUANTUM) + (Quantim.QUANTUM / 2)) * SECONDS - return (y + elapsed_days + elapsed_seconds) - - @classmethod - def now(cls): - return cls(datetime.datetime.now()) - def Slug40(text): """ @@ -291,3 +230,60 @@ def fromJSON(cls, js, **kwargs): def fromYAML(cls, ydoc, **kwargs): jdn = yaml.safe_load(ydoc) return cls.fromJDN(jdn, **kwargs) + + +#-- Used as pre-defined symbol sets for the Boggle function, below. +BOGGLES = { + "Azed9": string.ascii_uppercase + string.ascii_lowercase + string.digits, + "ALPHA9": string.ascii_uppercase + string.digits, + "ALPHA": string.ascii_uppercase, + "Azed": string.ascii_uppercase + string.ascii_lowercase, + "HEX": string.digits + "ABCDEF", + "hex": string.digits + "abcdef", + "W64": string.ascii_uppercase + string.ascii_lowercase + string.digits + "+!", + "b32": string.ascii_uppercase + "234567" +} + +def Boggle(*args): + """ + I generate a random word of n characters. + My default alphabet is the RFC 4648 Section 6 base32 scheme. + However, there are also named alphabets... + Azed9 -> [A-Z][a-z][0-9] + ALPHA9 -> [A-Z][0-9] + ALPHA -> [A-Z] + Azed -> [A-Z][a-z] + HEX -> [0-9][A-F] + hex -> [0-9][a-f] + base64 -> URL safe base64 alphabet + ===== + Boggle(n) -> produces a random string of length n from the b32 symset. + Boggle( ) -> produces a random string of length 8 generated from the b32 symset. + Boggle(BOGGLE, n) + ...all forms eventually are computed as... + Boggle(n, symset) + """ + if len(args) == 0: + return Boggle(8, BOGGLES["b32"]) + + if len(args) == 1: + x = args[0] + if isinstance(x, int): + return Boggle(x, BOGGLES["b32"]) + elif isinstance(x, str): + return Boggle(8, BOGGLES[x]) + + if len(args) == 2: + x = args[0] + y = args[1] + + if isinstance(x, int) and isinstance(y, str): + symset = y + return "".join([random.choice(symset) for i in range(x)]) + + if isinstance(x, str) and isinstance(y, int): + return Boggle(y, BOGGLES[x]) + + #-- If somehow I got here without, then nothing matched the invocation. + #-- This is an error. + raise ValueError diff --git a/lib/Bastion/Curator.py b/lib/Bastion/Curator.py index f564e3f..e373618 100644 --- a/lib/Bastion/Curator.py +++ b/lib/Bastion/Curator.py @@ -5,7 +5,117 @@ """ import pathlib -from .Common import * +from .Common import * +from .Model import ARK +from .Chronology import Quantim + +class BLOND(canTextify): + """ + BLOb Name and Description + I am a structured name describing a single ark. + An ark is typically a BLOB or file, hence I am usually BLOB name (e.g. in an S3 bucket) or a file name. + """ + def __init__(self, asset, detail, basis = None, when = None): + if isinstance(asset, ARK): + self.badge = asset.badge + elif isinstance(asset, str): + self.badge = asset + else: + raise ValueError + + when = datetime.datetime.utcnow() if when is None else when + + self.when = Quantim(when) + self.detail = detail + self.basis = basis + self.RDN = str(self) + + #-- Automatically create a basis reference for full backups. + if self.detail == 'F': + self.basis = Boggle(3) + + @property + def anchor(self): + """ + I am an alternate attribute name for the anchor reference ID. + """ + return self.basis + + @property + def isAnchor(self): + return (self.detail == 'F') + + @property + def isDifferential(self): + return (self.detail == 'D') + + @staticmethod + def encode(badge, when, detail, basis): + return "{}{}{}{}".format(badge, str(Quantim(when)), detail, basis) + + def __str__(self): + return self.encode(self.badge, self.when, self.detail, self.basis) + + @classmethod + def decode(cls, blond): + ds = Thing(**{ + 'badge': blond[0:8], + 'when': Quantim(blond[8:16]), + 'detail': blond[16], + 'basis': blond[17:20], + }) + return cls(ds.badge, ds.when, ds.detail, ds.basis) + + @classmethod + def fullBackup(cls, ark, **kwargs): + when = kwargs.get('when', datetime.datetime.utcnow()) + basis = kwargs.get('basis', Boggle(3)) + return cls(ark, 'F', basis, when) + + @classmethod + def diffBackup(cls, anchor, **kwargs): + """ + Given a full backup reference (BLOND), I generate a new BLOND for a differential backup. + """ + when = kwargs.get('when', datetime.datetime.utcnow()) + return cls(anchor.badge, 'D', anchor.basis, when) + + + + +class Thread(tuple): + """ + I am all snaps relative to a given anchor. + """ + def head(self): + return self[-1] + + def anchor(self): + return self[0] + + @property + def basis(self): + return self.anchor.basis + + @property + def earliest(self): + return self[0] + + @property + def latest(self): + return self[-1] + + @property + def begins(self): + return self.anchor.when.datetime() + + @property + def ends(self): + return self.head.when.datetime() + + @property + def drift(self): + return self.head.drift #-- Archives > Anchors > Snaps @@ -13,67 +123,120 @@ #-- Each snap is a 2-tuple of (anchor, differential) #-- An "anchor" is a full backup of the dataset. #-- Each blob in the archive is recorded as ... -#-- {slug}-{quantim}-[A|D]{anchor} +#-- {slug}{quantim}[A|D]{anchor} #-- Where {slug} is the Slug40 encoding (a 8 character, base32 word) of the dataset name (relative to the Rz), #-- {quantim} is the 8 character encoding of timestamp using the Quantim method #-- A if the blob is an anchor (full backup) and D if the blob is a differential. #-- {anchor} - is a 3 character random string that cannot conflict with any other anchors currently in the archive. - - -class Archive(canTextify): +class Manifest: """ I represent the chronicled archive of some dataset. I hold a series of "snaps". """ - def __init__(self, asset, **kwargs): - self.asset = asset - self.zone = asset.zone - self.site = asset.zone.site - self.lscat = [ ] #-- list of files that are in this archive. + def __init__(self, asset, items): + self.asset = ARK(asset) + self.badge = self.asset.badge + self._items = None #-- a sorted tuple of BLONDs + self._snaps = None + self._anchors = None #-- a map of anchor layer -> blob name + blonds = [BLOND(item) for item in items] + self._items = tuple(sorted([blond for blond in blonds if (blond.badge == self.badge)], key = lambda b: b.RDN)) + self._anchors = dict([(item.layer, item) for item in self._items if item.isAnchor]) + + def __iter__(self): + return iter(self._items) + + def __len__(self): + return len(self._items) + + @property def snaps(self): - #-- Answers a list of all snaps in my lscat that are for the given asset. - #-- Snaps are ordered chronologically from earliest to lastest. - raise NotImplementedError + if self._snaps is None: + self._snaps = tuple([Snap(item, self.anchor(item)) for item in self]) + return self._snaps + + @property + def earliest(self): + return self.snaps[0] + + @property + def latest(self): + return self.snaps[-1] + + @property + def head(self): + """ + I am the most recent snap. + """ + return self._snaps[-1] - def snap(self, whence): - raise NotImplementedError + def thread(self, ankle): + """ + I am a chronologically sorted (earliest -> latest) tuple of all snaps relative to the given anchor. + """ + if isinstance(ankle, str): + anchor = ankle + elif isinstance(ankle, snap): + anchor = ankle.anchor.basis + elif isinstance(ankle, BLOND): + anchor = ankle.basis + return Thread([snap for snap in self.snaps if snap.basis == anchor]) - def anchor(self, snap): + def anchor(self, item): """ - I answer the snap that is the anchor layer for the given snap. + I answer the blond to the anchor for the given item. + .anchor(item:BLOND) + .anchor(item:str) + .anchor(item:Snap) """ - raise NotImplementedError + if isinstance(item, BLOND): + return self._anchors[item.anchor] + if isinstance(item, str): + return self.anchor( BLOND.decode(item) ) + if isinstance(item, Snap): + return item.anchor + raise ValueError + + @property + def anchors(self): + """ + I answer a chronologically (earliest -> latest) tuple of BLONDs for my anchor items. + """ + return tuple(sorted(self._anchors.values, key = lambda item: item.RDN)) - def toJDN(self, **kwargs): - jdn = { - '_type': "Bastion.Curator.Archive", - 'site': self.site, - 'zone': self.zone, - 'resource': self.resource, - } - return jdn class Snap: + """ + A snap is a pair of blonds (differential, anchor) + In the case that the snap is an anchor then (anchor, anchor) + """ + def __init__(self, head, anchor): + self.head = head + self.anchor = anchor + + @property + def basis(self): + return self.anchor.basis + def age(self, whence = None): """ I answer a datetime timedelta (elapsed time) between whence and the encoded datetime of this snap. If no "whence" is explicitly given, I assume the current UTC time. """ whence = whence if whence is not None else datetime.datetime.utcnow() - return (whence - self.when.datetime()) + return (whence - self.head.when.datetime()) - @staticmethod - def parse(path): - path = pathlib.PurePath(path) - return Thing(**{ - 'slug': path.stem[0:8], - 'when': Quantim(path.stem[8:16]), - 'layer': path.stem[16], - 'anchor': path.stem[17:20], - }) + @property + def drift(self): + #-- elapsed time between head and its anchor. + return (self.head.when.datetime() - self.anchor.when.datetime()) - @staticmethod - def dub(slug, when, layer, anchor): - return "{}{}{}{}".format(slug, str(Quantim(when)), layer, anchor) + @property + def isAnchor(self): + return (self.head.isAnchor and (self.head == self.anchor)) + + @property + def isDifferential(self): + return self.head.isDifferential diff --git a/lib/Bastion/HPSS.py b/lib/Bastion/HPSS.py index 45e86af..9aadd4a 100644 --- a/lib/Bastion/HPSS.py +++ b/lib/Bastion/HPSS.py @@ -4,9 +4,11 @@ import operator import datetime import json +import socket from Bastion.Common import Thing, Unknown - +import Bastion.Model +from Bastion.Curator import BLOND @@ -211,16 +213,14 @@ def __delitem__(self, path): class HSI: def __init__(self, xpath = None, **kwargs): - self.xpath = pathlib.Path("/usr/local/bin/hsi") - self.login = os.getlogin() - self.proc = None + self.xpath = pathlib.Path( kwargs.get('xpath', "/usr/local/bin/hsi") ) + self.login = kwargs.get('login', os.getlogin()) + self.keytab = pathlib.Path(kwargs.get('keytab', '~/.private/hpss.unix.keytab')).expanduser() + self.procd = None if xpath is not None: self.xpath = pathlib.Path(xpath) - if 'login' in kwargs: - self.login = kwargs['login'] - self.annotations = AnnotationSkill(self) def do(self, command): @@ -228,6 +228,11 @@ def do(self, command): self.procd = subprocess.run(comargs, capture_output = True, check = True) return self.procd + def mkdirs(self, path): + request = "mkdir -p {}".format( str(path) ) + procd = self.do(request) + return True + def statx(self, target): """ Answers a slightly different set of stats for the given path. @@ -296,8 +301,11 @@ def ls(self, path = None): #-------------------------- lines = [line.strip() for line in procd.stdout.decode('utf-8').split('\n')] lines = [line for line in lines if len(line) > 0] - - entries = [fstat.parsed_from_ls_entry(line) for line in lines[1:]] + if lines[0][:3] == "***": + #-- This is likely an error condition; e.g. the requested path doesn't exist. + entries = None + else: + entries = [fstat.from_ls_entry(line) for line in lines[1:]] return entries @@ -309,5 +317,118 @@ def lsx(self, path = None): +class Vault(Bastion.Model.Vault): + def __init__(self, name, **kwargs): + self.name = name + self.host = kwargs.get('host', socket.gethostname()) + self.login = kwargs.get('login', os.getlogin()) + self.keytab = pathlib.Path( kwargs.get('keytab', "~/.private/hpss.unix.keytab") ).expanduser() + self.root = kwargs.get('root', None) + self.hpath = pathlib.Path( kwargs.get('hpath', '/opt/hsi') ) + self.xpath = pathlib.Path( kwargs.get('xpath', (self.hpath / 'bin' / 'hsi')) ) + self.xhtar = self.hpath / 'bin' / 'htar' + self._hsi = None + + def configured(self, conf): + confkey = "vaults.{}".format(self.name) + if confkey in conf: + section = conf[confkey] + self.host = section['host'] + self.login = section['login'] + if 'root' in section: + self.root = pathlib.PurePosixPath( section['root'] ) + return self + + @property + def hsi(self): + if self._hsi is None: + self._hsi = HSI(xpath = self.xpath, login = self.login, keytab = self.keytab) + return self._hsi + + @property + def sites(self): + #-- sites are top level elements relative to the root of the vault. + fls = self.hsi.ls(self.root) + return tuple(sorted([fl.path.name for fl in fls if fl.isdir()])) + + @property + def ARKs(self): + arks = [ ] + for site in self.sites: + for zone in self.zones(site): + for asset in self.assets(site, zone): + arks.append( ARK(site, zone, asset) ) + return tuple(sorted(arks)) + + def zones(self, site): + #-- a zone will be a subdirectory (subfolder) of the given site. + #-- look for all of the subfolders of root / site + fls = self.hsi.ls(self.root / site) + return tuple(sorted([fl.path.name for fl in fls if fl.isdir()])) + + def assets(self, site, zone): + #-- assets will be subdirectories (subfolders) of a given site, zone. + fls = self.hsi.ls(self.root / site / zone) + if fls is not None: + assets = tuple(sorted([fl.path.name for fl in fls if fl.isdir()])) + else: + assets = tuple( ) + return assets + + def manifest(self, *args): + """ + I answer a manifest of the named asset... + manifest(ark) + manifest(site, zone, asset) + """ + if len(args) == 1: + ark = args[0] + elif len(args) == 3: + site, zone, asset = args + ark = ARK(site, zone, asset) + else: + raise ValueError + + #-- The contents of {root}/{site}/{zone}/{asset} are backup blobs + #-- Each name in this folder is a "BLOND" (BLOb Name and Descriptor) + #-- The manifest a catalog of all of the backup objects for the asset. + fls = self.hsi.ls(self.root / ark.site / ark.zone / ark.asset) + blonds = [fl.path.name for fl in fls] + manifest = Bastion.Curator.Manifest(ark, blonds) + + return manifest + + def provision(self, *args): + if len(args) == 1: + ark = args[0] + self.hsi.mkdirs(self.root / ark.site / ark.zone / ark.asset) + elif len(args) == 3: + site, zone, asset = args + ark = ARK(site, zone, asset) + self.provision(ark) + else: + raise ValueError + + def htar(self, asset, **kwargs): + detail = kwargs.get('detail', 'F') + localf = asset.path + ark = asset.ARK + + opts = { + 'htar': self.xhtar, + 'site': ark.site, + 'zone': ark.zone, + 'asset': ark.asset, + 'blond': BLOND(ark, detail), + 'localf': localf + } + + comargs = [opts['htar'], "-Hverify=1", "-cfvf", "{site}/{zone}/{asset}/{blond}".format(**opts), "{localf}".format(**opts)] + proc = subprocess.run(comargs, capture_output = True, check = True) + stdout = proc.stdout.decode('utf-8') + stderr = proc.stderr.decode('utf-8') + flag = True if (proc.returncode == 0) else False + return (flag, stdout, stderr) + -hsi = HSI("/opt/hsi/bin/hsi", login = "ndenny") +#hsi = HSI("/opt/hsi/bin/hsi", login = "ndenny") diff --git a/lib/Bastion/Model.py b/lib/Bastion/Model.py new file mode 100644 index 0000000..c8ac9b0 --- /dev/null +++ b/lib/Bastion/Model.py @@ -0,0 +1,107 @@ +""" +Bastion.Model +""" +import pathlib + +from .Common import RDN, CURIE, Slug40 + + + +class ARK(tuple): + """ + Asset Resource Key (not to be confused with the librarian's definition of ARK!) + I am a triple of the form (site.name, zone.name, asset) + I have a canonical representation as a CURIE. + """ + def __new__(cls, *args): + if len(args) == 1: + arg = args[0] + if isinstance(arg, ARK): + return arg + + if isinstance(arg, CURIE): + site = arg.space + if site[0] == '@': + site = site[1:] + zone = arg.path.parts[0] + asset = arg.path.relative_to( pathlib.PurePosixPath(zone) ) + return ARK(site, zone, asset) + + elif isinstance(arg, str): + return ARK(CURIE(arg)) + + if len(args) == 3: + site, zone, asset = args + s = RDN(site) + z = RDN(zone) + a = pathlib.PurePosixPath(asset) + st = s if (s[0] == '@') else "@{}".format(s) + return tuple.__new__(cls, [st, z, a]) + + raise ValueError + + def __str__(self): + return str(self.CURIE) + + @property + def site(self): + return self[0][1:] + + @property + def zone(self): + return self[1] + + @property + def asset(self): + return self[2] + + @property + def zolo(self): + return pathlib.PurePosixPath(self.zone) / self.asset + + @property + def CURIE(self): + return CURIE("@{}".format(self.site), self.zolo) + + @property + def badge(self): + """ + I am a compact (40-bit) hash constructed from my CURIE (compact URI) + """ + return Slug40(str(self.CURIE)) + + + +class Vault: + """ + I am the base class for all storage vaults. + """ + + @property + def sites(self): + """ + I am the set of all sites tracked by this vault. + """ + raise NotImplementedError + + @property + def ARKs(self): + raise NotImplementedError + + def zones(self, site): + """ + I answer the set of zones for a given site. + """ + raise NotImplementedError + + def assets(self, zone): + """ + I am the set of all assets tracked by this vault. + """ + raise NotImplementedError + + def manifest(self, asset): + """ + Given an asset, I answer the manifest of held objects. + """ + raise NotImplementedError diff --git a/lib/Bastion/Site.py b/lib/Bastion/Site.py index 4eb9cd1..913b9d6 100644 --- a/lib/Bastion/Site.py +++ b/lib/Bastion/Site.py @@ -7,6 +7,7 @@ from .Common import * from .Condo import CxNode +from .Model import ARK #from .Curator import Asset logger = logging.getLogger(__name__) @@ -78,6 +79,7 @@ def __init__(self, name): self._zones = { } self._catalogs = { } + self._configured = False def assets(self, zone): k = RDN(zone) @@ -100,36 +102,38 @@ def RDN(self): return self.name def configured(self, conf): - if conf: - condex = conf['sites'][self.name] - if 'logging' in condex: - self.logging.level = condex.get(asLogLevel, 'logging.level', logging.WARN) - - if 'policy' in condex: - self.policy = RetentionPolicy(condex['policy']) - - if 'zones' in condex: - for zkey, zspec in condex['zones']: - zname = str(zkey) - logger.info("associationg (resource) zone {} to site {}".format(zname, self.name)) - logger.debug("self.zones is type {}".format(type(self.zones))) - self._zones[zname] = Zone(self, zname).configured( zspec ) - - aspecs = conf.get("assets.{}".format(self.name), None) - if aspecs is not None: - for zname in aspecs.keys: - logger.info("reading assets for resource zone {} in site {}".format(zname, self.name)) - zone = self.zone(zname) - for aspec in aspecs[zname]: - #-- Short form.... - if entity(aspec).isString: + if not self._configured: + if conf: + self._configured = True + condex = conf['sites'][self.name] + if 'logging' in condex: + self.logging.level = condex.get(asLogLevel, 'logging.level', logging.WARN) + + if 'policy' in condex: + self.policy = RetentionPolicy(condex['policy']) + + if 'zones' in condex: + for zkey, zspec in condex['zones']: + zname = str(zkey) + logger.info("associationg (resource) zone {} to site {}".format(zname, self.name)) + logger.debug("self.zones is type {}".format(type(self.zones))) + self._zones[zname] = Zone(self, zname).configured( zspec ) + + aspecs = conf.get("assets.{}".format(self.name), None) + if aspecs is not None: + for zname in aspecs.keys: + logger.info("reading assets for resource zone {} in site {}".format(zname, self.name)) + zone = self.zone(zname) + for aspec in aspecs[zname]: #-- Short form.... - zone.assets.add( aspec ) - logger.debug("added asset {} to zone {} by short form description".format(aspec, zname)) - else: - #-- Long form ... - zone.assets.add( Asset(zone, aspec) ) - logger.debug("added asset {} to zone {} by long form description".format(aspec, zname)) + if entity(aspec).isString: + #-- Short form.... + zone.assets.add( aspec ) + logger.debug("added asset {} to zone {} by short form description".format(aspec, zname)) + else: + #-- Long form ... + zone.assets.add( Asset(zone, aspec) ) + logger.debug("added asset {} to zone {} by long form description".format(aspec, zname)) return self @@ -141,22 +145,27 @@ def resources(self, zone): return self._catalogs[zname] - def asset(self, slug): + def asset(self, ark): """ - Will search through all zones to locate the asset identified by the given slug. - Raises an error if there are two or more zones that containe the same slug (i.e. a hash collision). + Will search through all zones to locate the asset identified by the given search argument: q. + .asset(q:ARK) """ - zoned = None - for zone in self.zones: - if slug in zone.assets: - if zoned is not None: - raise Exception("Site.asset - multiple zones ({}, {], etc.) claim asset {}".format(zone.name, zoned.name, slug)) - else: - zoned = zone - if zoned: - return zoned.assets[slug] - else: - return None + if (ark.site == self.name): + zone = self.zone(ark.zone) + return zone.assets.named(str(ark.asset)) + return None + +# zoned = None +# for zone in self.zones: +# if slug in zone.assets: +# if zoned is not None: +# raise Exception("Site.asset - multiple zones ({}, {], etc.) claim asset {}".format(zone.name, zoned.name, slug)) +# else: +# zoned = zone +# if zoned: +# return zoned.assets[slug] +# else: +# return None class RetentionPolicy(canConfigure, canTextify): @@ -255,6 +264,11 @@ def __div__(self, name): class Asset(canTextify): def __init__(self, zone, *args, **kwargs): + """ + Asset(zone, name) + Asset(zone, conf:CxNode) + Asset(zone, conf:dict) + """ self.zone = zone self.name = None self.about = None @@ -321,19 +335,27 @@ def CURIE(self): """ My CURIE (Compact URI) form is [{site}:{zone}/{asset}] """ - return CURIE(self.zone.site.name, str(pathlib.PurePath(self.zone.name) / self.name)) + return self.ARK.CURIE + + @property + def ARK(self): + return ARK(self.zone.site.name, self.zone.name, self.name) @property def badge(self): """ I am a compact (40-bit) hash constructed from my CURIE (compact URI) """ - return Slug40(str(self.CURIE)) + return self.ARK.badge + + @property + def zolo(self): + return self.ARK.zolo def toJDN(self, **kwargs): jdn = { - 'zone': "{}:{}".format(self.zone.site.name, self.zone.name), - 'path': str(self.path), + 'zone': "@{}:{}".format(self.zone.site.name, self.zone.name), + 'path': str(self.name), } if self.about: jdn['about'] = self.about @@ -379,8 +401,9 @@ def __init__(self, context, *args): else: raise Exception("AssetCatalog.__init__ - I don't know how to construct the requested AssetCatalog instance") - self.xRDNs = { } - self.xnames = { } + self.xRDNs = { } + self.xnames = { } + self._sorted = None @property def any(self): @@ -400,9 +423,21 @@ def add(self, obj): if asset.RDN not in self.xRDNs: self.xRDNs[asset.RDN] = asset self.xnames[asset.name] = asset + self._sorted = None else: raise Exception("duplicate asset RDN added!") + def __getitem__(self, x): + if isinstance(x, int): + if self._sorted is None: + self._sorted = sorted(list(self), key = lambda asset: asset.name) + return self._sorted[x] + else: + if x in self.xRDNs: + return self.xRDNs[x] + else: + return None + def __contains__(self, slug): return (slug in self.xRDNs) @@ -411,10 +446,13 @@ def update(self, asset): self.xnames[asset.name] = asset def named(self, name): - return self.xnames[name] + if name in self.xnames: + return self.xnames[name] + else: + return None - def __getitem__(self, slug): - return self.xRDNs[slug] + def __len__(self): + return len(self.xRDNs) def __iter__(self): return iter(sorted(self.xRDNs.values(), key = lambda x: str(x.path)))